[llvm] [RISCV] Re-model RVV comparison instructions (PR #88868)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 16 03:28:18 PDT 2024


https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/88868

We remove the `@earlyclobber` constraint.

Instead, we add `RegisterClass`es which contain only the lowest
LMUL1 registers of different `LMUL`s. We use them as the output
operand of comparison instrcutions to match the constraint:

> The destination EEW is smaller than the source EEW and the overlap
> is in the lowest-numbered part of the source register group.

The benefits:
* From the tests diff, we can see improvements (and there are some
  regressions in LMUL8 tests). Of course, this conclusion is from
  unit tests, we should evaluate it on some benchmarks.
* This change makes #88496 possible (though I think a fine-grained
  earlyclobber mechanism should be the right way).
* If we agree, we can model other instructions in this way.


>From d72e50aae48ffed5fb6c1a9ad6bfc47c5ca93230 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 16 Apr 2024 18:27:56 +0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 .../RISCV/GISel/RISCVRegisterBankInfo.cpp     |   16 +-
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    |   22 +-
 llvm/lib/Target/RISCV/RISCVRegisterInfo.td    |    6 +
 .../instruction-select/rvv/icmp.mir           |  136 +-
 .../CodeGen/RISCV/rvv/active_lane_mask.ll     |   44 +-
 llvm/test/CodeGen/RISCV/rvv/binop-splats.ll   |   18 +-
 llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll  |    4 +-
 llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll       |    4 +-
 llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll        |  126 +-
 llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll  |   28 +-
 .../RISCV/rvv/fixed-vectors-binop-splats.ll   |   12 +-
 .../RISCV/rvv/fixed-vectors-bitreverse-vp.ll  |    4 +-
 .../RISCV/rvv/fixed-vectors-bswap-vp.ll       |    4 +-
 .../RISCV/rvv/fixed-vectors-ceil-vp.ll        |  113 +-
 .../RISCV/rvv/fixed-vectors-extract-i1.ll     |   96 +-
 .../RISCV/rvv/fixed-vectors-floor-vp.ll       |  113 +-
 .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll    |  221 +-
 .../RISCV/rvv/fixed-vectors-fmaximum.ll       |   74 +-
 .../RISCV/rvv/fixed-vectors-fminimum-vp.ll    |  221 +-
 .../RISCV/rvv/fixed-vectors-fminimum.ll       |   74 +-
 .../RISCV/rvv/fixed-vectors-fp-setcc.ll       |  276 +-
 .../RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll |    5 +-
 .../RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll |    5 +-
 .../RISCV/rvv/fixed-vectors-int-setcc.ll      |   96 +-
 .../rvv/fixed-vectors-interleave-store.ll     |    4 +-
 .../RISCV/rvv/fixed-vectors-mask-splat.ll     |    8 +-
 .../RISCV/rvv/fixed-vectors-masked-load-fp.ll |   32 +-
 .../rvv/fixed-vectors-masked-load-int.ll      |   28 +-
 .../rvv/fixed-vectors-masked-store-fp.ll      |  152 +-
 .../rvv/fixed-vectors-masked-store-int.ll     |  142 +-
 .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll   |  119 +-
 .../RISCV/rvv/fixed-vectors-reduction-fp.ll   |  496 ++-
 .../rvv/fixed-vectors-reduction-int-vp.ll     |   44 +-
 .../RISCV/rvv/fixed-vectors-rint-vp.ll        |   89 +-
 .../RISCV/rvv/fixed-vectors-round-vp.ll       |  113 +-
 .../RISCV/rvv/fixed-vectors-roundeven-vp.ll   |  113 +-
 .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll |  113 +-
 .../RISCV/rvv/fixed-vectors-setcc-fp-vp.ll    |  412 +--
 .../RISCV/rvv/fixed-vectors-setcc-int-vp.ll   |  459 +--
 .../RISCV/rvv/fixed-vectors-trunc-vp.ll       |   50 +-
 .../RISCV/rvv/fixed-vectors-vfclass-vp.ll     |   35 +-
 .../fixed-vectors-vfcmp-constrained-sdnode.ll | 2268 +++++-------
 ...fixed-vectors-vfcmps-constrained-sdnode.ll |  972 ++---
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        |    8 +-
 .../RISCV/rvv/fixed-vectors-vselect-vp.ll     |   20 +-
 llvm/test/CodeGen/RISCV/rvv/floor-vp.ll       |  126 +-
 .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll |  198 +-
 llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll    |  581 ++-
 .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll |  198 +-
 llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll    |  581 ++-
 .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll |   40 +-
 llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll   |  284 +-
 llvm/test/CodeGen/RISCV/rvv/rint-vp.ll        |  202 +-
 llvm/test/CodeGen/RISCV/rvv/round-vp.ll       |  202 +-
 llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll   |  202 +-
 llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll |  202 +-
 llvm/test/CodeGen/RISCV/rvv/select-int.ll     |   36 +-
 llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll    |  718 ++--
 llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll       |  592 +--
 llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll   |  470 +--
 llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll  |    4 +-
 llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll   |   24 +-
 .../RISCV/rvv/vector-deinterleave-load.ll     |    4 +-
 .../RISCV/rvv/vector-interleave-store.ll      |   35 +-
 .../CodeGen/RISCV/rvv/vector-interleave.ll    |    8 +-
 .../RISCV/rvv/vfcmp-constrained-sdnode.ll     | 3282 ++++++++---------
 .../RISCV/rvv/vfcmps-constrained-sdnode.ll    | 1458 ++++----
 llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll  |   16 +-
 .../test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll |    5 +-
 .../test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll |    5 +-
 llvm/test/CodeGen/RISCV/rvv/vmfeq.ll          |   78 +-
 llvm/test/CodeGen/RISCV/rvv/vmfge.ll          |   78 +-
 llvm/test/CodeGen/RISCV/rvv/vmfgt.ll          |   78 +-
 llvm/test/CodeGen/RISCV/rvv/vmfle.ll          |   78 +-
 llvm/test/CodeGen/RISCV/rvv/vmflt.ll          |   78 +-
 llvm/test/CodeGen/RISCV/rvv/vmfne.ll          |   78 +-
 llvm/test/CodeGen/RISCV/rvv/vmseq.ll          |  164 +-
 llvm/test/CodeGen/RISCV/rvv/vmsge.ll          |  240 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll         |  240 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgt.ll          |  164 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll         |  164 +-
 llvm/test/CodeGen/RISCV/rvv/vmsle.ll          |  164 +-
 llvm/test/CodeGen/RISCV/rvv/vmsleu.ll         |  164 +-
 llvm/test/CodeGen/RISCV/rvv/vmslt.ll          |  164 +-
 llvm/test/CodeGen/RISCV/rvv/vmsltu.ll         |  164 +-
 llvm/test/CodeGen/RISCV/rvv/vmsne.ll          |  164 +-
 .../test/CodeGen/RISCV/rvv/vp-reverse-mask.ll |    9 +-
 .../RISCV/rvv/vp-splice-mask-vectors.ll       |    9 +-
 llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll     |   32 +-
 llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll |    5 +-
 90 files changed, 8900 insertions(+), 10283 deletions(-)

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 86e44343b50865..ca77a9729e03b9 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -110,6 +110,8 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
                                               LLT Ty) const {
   switch (RC.getID()) {
   default:
+    if (RISCVRI::isVRegClass(RC.TSFlags))
+      return getRegBank(RISCV::VRBRegBankID);
     llvm_unreachable("Register class not supported");
   case RISCV::GPRRegClassID:
   case RISCV::GPRF16RegClassID:
@@ -131,20 +133,6 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   case RISCV::FPR64CRegClassID:
   case RISCV::FPR32CRegClassID:
     return getRegBank(RISCV::FPRBRegBankID);
-  case RISCV::VMRegClassID:
-  case RISCV::VRRegClassID:
-  case RISCV::VRNoV0RegClassID:
-  case RISCV::VRM2RegClassID:
-  case RISCV::VRM2NoV0RegClassID:
-  case RISCV::VRM4RegClassID:
-  case RISCV::VRM4NoV0RegClassID:
-  case RISCV::VMV0RegClassID:
-  case RISCV::VRM2_with_sub_vrm1_0_in_VMV0RegClassID:
-  case RISCV::VRM4_with_sub_vrm1_0_in_VMV0RegClassID:
-  case RISCV::VRM8RegClassID:
-  case RISCV::VRM8NoV0RegClassID:
-  case RISCV::VRM8_with_sub_vrm1_0_in_VMV0RegClassID:
-    return getRegBank(RISCV::VRBRegBankID);
   }
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index ad1821d57256bc..686bfd1af0d062 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -143,22 +143,24 @@ class PseudoToVInst<string PseudoInst> {
 
 // This class describes information associated to the LMUL.
 class LMULInfo<int lmul, int oct, VReg regclass, VReg wregclass,
-               VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> {
+               VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx,
+               VReg moutregclass = VMM1> {
   bits<3> value = lmul; // This is encoded as the vlmul field of vtype.
   VReg vrclass = regclass;
   VReg wvrclass = wregclass;
   VReg f8vrclass = f8regclass;
   VReg f4vrclass = f4regclass;
   VReg f2vrclass = f2regclass;
+  VReg moutclass = moutregclass;
   string MX = mx;
   int octuple = oct;
 }
 
 // Associate LMUL with tablegen records of register classes.
 def V_M1  : LMULInfo<0b000,  8,   VR,        VRM2,   VR,   VR, VR, "M1">;
-def V_M2  : LMULInfo<0b001, 16, VRM2,        VRM4,   VR,   VR, VR, "M2">;
-def V_M4  : LMULInfo<0b010, 32, VRM4,        VRM8, VRM2,   VR, VR, "M4">;
-def V_M8  : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8">;
+def V_M2  : LMULInfo<0b001, 16, VRM2,        VRM4,   VR,   VR, VR, "M2", VMM2>;
+def V_M4  : LMULInfo<0b010, 32, VRM4,        VRM8, VRM2,   VR, VR, "M4", VMM4>;
+def V_M8  : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8", VMM8>;
 
 def V_MF8 : LMULInfo<0b101, 1, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF8">;
 def V_MF4 : LMULInfo<0b110, 2, VR, VR,          VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF4">;
@@ -2668,25 +2670,21 @@ multiclass PseudoVEXT_VF8 {
 // With LMUL<=1 the source and dest occupy a single register so any overlap
 // is in the lowest-numbered part.
 multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> {
-  defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m,
-                            !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+  defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "", TargetConstraintType>;
 }
 
 multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> {
   defm "_VX" :
-    VPseudoBinaryM<VR, m.vrclass, GPR, m,
-                   !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+    VPseudoBinaryM<m.moutclass, m.vrclass, GPR, m, "", TargetConstraintType>;
 }
 
 multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
   defm "_V" # f.FX :
-    VPseudoBinaryM<VR, m.vrclass, f.fprclass, m,
-                   !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+    VPseudoBinaryM<m.moutclass, m.vrclass, f.fprclass, m, "", TargetConstraintType>;
 }
 
 multiclass VPseudoBinaryM_VI<LMULInfo m, int TargetConstraintType = 1> {
-  defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m,
-                            !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+  defm _VI : VPseudoBinaryM<m.moutclass, m.vrclass, simm5, m, "", TargetConstraintType>;
 }
 
 multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 316daf2763ca1e..1a0533c7072705 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -533,6 +533,12 @@ def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
               (add (sequence "V%u", 8, 31),
                    (sequence "V%u", 7, 0)), 1>;
 
+// V0 is likely to be used as mask, so we move it in front of allocation order.
+def VMM1 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31)), 1>;
+def VMM2 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 2)), 1>;
+def VMM4 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 4)), 1>;
+def VMM8 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 8)), 1>;
+
 def VRNoV0 : VReg<!listconcat(VM1VTs, VMaskVTs), (sub VR, V0), 1>;
 
 def VRM2 : VReg<VM2VTs, (add (sequence "V%uM2", 8, 31, 2),
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir
index df0d48aac92551..0677232fa60677 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir
@@ -13,13 +13,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv1i8
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV32I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv1i8
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV64I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF
@@ -37,13 +37,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv2i8
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV32I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv2i8
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV64I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF
@@ -61,13 +61,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv4i8
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV32I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv4i8
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV64I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF
@@ -85,13 +85,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv8i8
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV32I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv8i8
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV64I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF
@@ -109,14 +109,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv16i8
     ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv16i8
     ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(ugt), %0(<vscale x 16 x s8>), %0
@@ -133,14 +133,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv32i8
     ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv32i8
     ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s8>), %0
@@ -157,14 +157,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv64i8
     ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv64i8
     ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 64 x s1>) = G_ICMP intpred(ule), %0(<vscale x 64 x s8>), %0
@@ -181,13 +181,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv1i16
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV32I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv1i16
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV64I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF
@@ -205,13 +205,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv2i16
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV32I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv2i16
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV64I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF
@@ -229,13 +229,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv4i16
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv4i16
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF
@@ -253,14 +253,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv8i16
     ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv8i16
     ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s16>), %0
@@ -277,14 +277,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv16i16
     ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv16i16
     ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(slt), %0(<vscale x 16 x s16>), %0
@@ -301,14 +301,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv32i16
     ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv32i16
     ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(uge), %0(<vscale x 32 x s16>), %0
@@ -325,13 +325,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv1i32
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV32I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv1i32
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV64I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF
@@ -349,13 +349,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv2i32
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV32I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv2i32
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV64I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF
@@ -373,14 +373,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv4i32
     ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLT_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M2_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv4i32
     ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLT_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M2_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s32>), %0
@@ -397,14 +397,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv8i32
     ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLEU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M4_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv8i32
     ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLEU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M4_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ule), %0(<vscale x 8 x s32>), %0
@@ -421,14 +421,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv16i32
     ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLE_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M8_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv16i32
     ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLE_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M8_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sle), %0(<vscale x 16 x s32>), %0
@@ -445,13 +445,13 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv1i64
     ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv1i64
     ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
-    ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF
@@ -469,14 +469,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv2i64
     ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSNE_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_M2_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv2i64
     ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSNE_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_M2_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ne), %0(<vscale x 2 x s64>), %0
@@ -493,14 +493,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv4i64
     ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLTU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M4_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv4i64
     ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLTU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M4_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(ult), %0(<vscale x 4 x s64>), %0
@@ -517,14 +517,14 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: icmp_nxv8i64
     ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */
-    ; RV32I-NEXT: $v8 = COPY %1
+    ; RV32I-NEXT: [[PseudoVMSLTU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M8_]]
     ; RV32I-NEXT: PseudoRET implicit $v8
     ;
     ; RV64I-LABEL: name: icmp_nxv8i64
     ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
-    ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */
-    ; RV64I-NEXT: $v8 = COPY %1
+    ; RV64I-NEXT: [[PseudoVMSLTU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */
+    ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M8_]]
     ; RV64I-NEXT: PseudoRET implicit $v8
     %0:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF
     %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s64>), %0
diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
index 139579b3d2a361..9822e0df63fdbf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
@@ -112,9 +112,9 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v8, v16, a1
-; CHECK-NEXT:    vmsltu.vx v16, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vi v0, v16, 2
+; CHECK-NEXT:    vslideup.vi v0, v8, 2
 ; CHECK-NEXT:    ret
   %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc)
   ret <32 x i1> %mask
@@ -132,27 +132,27 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v8, v16, a1
-; CHECK-NEXT:    vmsltu.vx v16, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v0, v16, 2
+; CHECK-NEXT:    vslideup.vi v0, v8, 2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI9_1)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_1)
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v8, v16, a1
-; CHECK-NEXT:    vmsltu.vx v16, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v0, v16, 4
+; CHECK-NEXT:    vslideup.vi v0, v8, 4
 ; CHECK-NEXT:    lui a0, %hi(.LCPI9_2)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_2)
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v8, v16, a1
-; CHECK-NEXT:    vmsltu.vx v16, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v0, v16, 6
+; CHECK-NEXT:    vslideup.vi v0, v8, 6
 ; CHECK-NEXT:    ret
   %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc)
   ret <64 x i1> %mask
@@ -170,30 +170,30 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v10, v16, a2
+; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v8, v16, a2
+; CHECK-NEXT:    vsaddu.vx v8, v16, a1
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vslideup.vi v8, v0, 2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_2)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_2)
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v9
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v9, v16, a2
+; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vslideup.vi v8, v0, 4
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_3)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_3)
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v9
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v9, v16, a2
+; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 6
+; CHECK-NEXT:    vslideup.vi v8, v0, 6
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_4)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_4)
@@ -203,27 +203,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    vsext.vf8 v16, v9
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v9, v16, a2
+; CHECK-NEXT:    vmsltu.vx v16, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v0, v9, 2
+; CHECK-NEXT:    vslideup.vi v0, v16, 2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_5)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_5)
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v9
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v9, v16, a2
+; CHECK-NEXT:    vmsltu.vx v16, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v0, v9, 4
+; CHECK-NEXT:    vslideup.vi v0, v16, 4
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_6)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_6)
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v9
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v9, v16, a2
+; CHECK-NEXT:    vmsltu.vx v16, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v0, v9, 6
+; CHECK-NEXT:    vslideup.vi v0, v16, 6
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vslideup.vi v0, v8, 8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
index 6875925adad834..53d1ae57fd7d3f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
@@ -88,11 +88,11 @@ define <vscale x 16 x i1> @nxv16i1(i1 %x, i1 %y) {
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vmsne.vi v10, v8, 0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    andi a1, a1, 1
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v11, v8, 0
-; CHECK-NEXT:    vmxor.mm v0, v10, v11
+; CHECK-NEXT:    vmsne.vi v2, v8, 0
+; CHECK-NEXT:    vmxor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head.x = insertelement <vscale x 16 x i1> poison, i1 %x, i32 0
   %splat.x = shufflevector <vscale x 16 x i1> %head.x, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
@@ -108,11 +108,11 @@ define <vscale x 32 x i1> @nxv32i1(i1 %x, i1 %y) {
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vmsne.vi v12, v8, 0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    andi a1, a1, 1
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v13, v8, 0
-; CHECK-NEXT:    vmxor.mm v0, v12, v13
+; CHECK-NEXT:    vmsne.vi v4, v8, 0
+; CHECK-NEXT:    vmxor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head.x = insertelement <vscale x 32 x i1> poison, i1 %x, i32 0
   %splat.x = shufflevector <vscale x 32 x i1> %head.x, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
@@ -128,11 +128,11 @@ define <vscale x 64 x i1> @nxv64i1(i1 %x, i1 %y) {
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vmsne.vi v16, v8, 0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    andi a1, a1, 1
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v17, v8, 0
-; CHECK-NEXT:    vmxor.mm v0, v16, v17
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmxor.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head.x = insertelement <vscale x 64 x i1> poison, i1 %x, i32 0
   %splat.x = shufflevector <vscale x 64 x i1> %head.x, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 879dff4a6e490b..d063d0565f1ea8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -2422,8 +2422,8 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
@@ -2780,8 +2780,8 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 800d06c5a78f54..2e78d9f0d37903 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -1126,8 +1126,8 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
@@ -1369,8 +1369,8 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index 5b271606f08abe..6dcc3b8e3e54a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -135,21 +135,19 @@ declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x
 define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -179,21 +177,19 @@ declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscal
 define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI8_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -223,21 +219,19 @@ declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscal
 define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI10_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x half> %v
@@ -351,21 +345,19 @@ declare <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float>, <vscale
 define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -395,21 +387,19 @@ declare <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float>, <vscale
 define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -439,21 +429,19 @@ declare <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float>, <vsc
 define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -525,21 +513,19 @@ declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vsca
 define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -569,21 +555,19 @@ declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vsca
 define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -613,21 +597,19 @@ declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vsca
 define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv7f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -657,21 +639,19 @@ declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vsca
 define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -714,7 +694,7 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 3
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
 ; CHECK-NEXT:    sub a2, a0, a1
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
@@ -722,13 +702,11 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
 ; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a2, 3
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
 ; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index e69b4789a09afe..b4ab11cd17175c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -145,12 +145,12 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    addi a3, sp, 64
 ; RV32-NEXT:    slli a2, a2, 3
 ; RV32-NEXT:    add a4, a0, a2
-; RV32-NEXT:    vl8r.v v16, (a4)
-; RV32-NEXT:    vl8r.v v24, (a0)
+; RV32-NEXT:    vl8r.v v8, (a4)
+; RV32-NEXT:    vl8r.v v16, (a0)
 ; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; RV32-NEXT:    vmseq.vi v8, v16, 0
-; RV32-NEXT:    vmseq.vi v0, v24, 0
+; RV32-NEXT:    vmseq.vi v8, v8, 0
+; RV32-NEXT:    vmseq.vi v0, v16, 0
 ; RV32-NEXT:    vmv.v.i v16, 0
 ; RV32-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV32-NEXT:    vs8r.v v24, (a3)
@@ -185,12 +185,12 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    addi a3, sp, 64
 ; RV64-NEXT:    slli a2, a2, 3
 ; RV64-NEXT:    add a4, a0, a2
-; RV64-NEXT:    vl8r.v v16, (a4)
-; RV64-NEXT:    vl8r.v v24, (a0)
+; RV64-NEXT:    vl8r.v v8, (a4)
+; RV64-NEXT:    vl8r.v v16, (a0)
 ; RV64-NEXT:    add a1, a3, a1
 ; RV64-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; RV64-NEXT:    vmseq.vi v8, v16, 0
-; RV64-NEXT:    vmseq.vi v0, v24, 0
+; RV64-NEXT:    vmseq.vi v8, v8, 0
+; RV64-NEXT:    vmseq.vi v0, v16, 0
 ; RV64-NEXT:    vmv.v.i v16, 0
 ; RV64-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV64-NEXT:    vs8r.v v24, (a3)
@@ -275,8 +275,8 @@ define i1 @extractelt_nxv16i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmseq.vi v10, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v0
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 16 x i8>, ptr %x
@@ -290,8 +290,8 @@ define i1 @extractelt_nxv32i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v0
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 32 x i8>, ptr %x
@@ -305,8 +305,8 @@ define i1 @extractelt_nxv64i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v16, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v0
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 64 x i8>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
index ee8c322961c7bd..1d07ac83f51f63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
@@ -125,11 +125,11 @@ define <32 x i1> @v32i1(i1 %x, i1 %y) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vmsne.vi v10, v8, 0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    andi a1, a1, 1
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v11, v8, 0
-; CHECK-NEXT:    vmxor.mm v0, v10, v11
+; CHECK-NEXT:    vmsne.vi v2, v8, 0
+; CHECK-NEXT:    vmxor.mm v0, v0, v2
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    vrgather.vi v10, v8, 0
@@ -150,11 +150,11 @@ define <64 x i1> @v64i1(i1 %x, i1 %y) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vmsne.vi v12, v8, 0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    andi a1, a1, 1
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v13, v8, 0
-; CHECK-NEXT:    vmxor.mm v0, v12, v13
+; CHECK-NEXT:    vmsne.vi v4, v8, 0
+; CHECK-NEXT:    vmxor.mm v0, v0, v4
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    vrgather.vi v12, v8, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index 943fc58d637a00..95a9777671ff86 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -1793,8 +1793,8 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
@@ -2157,8 +2157,8 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
index f80d4e5c0d7c3f..b772fc23dca030 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
@@ -860,8 +860,8 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
@@ -1091,8 +1091,8 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
 ; RV64-NEXT:    vsll.vx v24, v8, a2, v0.t
 ; RV64-NEXT:    lui a3, 16
 ; RV64-NEXT:    addiw a3, a3, -256
-; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    li a4, 40
 ; RV64-NEXT:    vsll.vx v16, v16, a4, v0.t
 ; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
 ; RV64-NEXT:    addi a5, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
index 5d024f140fd5d6..68b479e3b9b8c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
@@ -194,23 +194,21 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 ;
 ; ZVFHMIN-LABEL: vp_ceil_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_v16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, i32)
 define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.ceil.v16f32(<16 x float>, <16 x i1>, i32)
 define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v15f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -743,7 +725,6 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
 ; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
@@ -760,13 +741,11 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
 ; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a1, 3
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index dccb62877af3cc..88a7878e8532b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -154,9 +154,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v10, v8, 0
+; RV32-NEXT:    vmseq.vi v0, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v10
+; RV32-NEXT:    vmv.x.s a0, v0
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
 ; RV32-NEXT:    ret
@@ -166,9 +166,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    li a2, 32
 ; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v10, v8, 0
+; RV64-NEXT:    vmseq.vi v0, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v10
+; RV64-NEXT:    vmv.x.s a0, v0
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
 ; RV64-NEXT:    ret
@@ -178,9 +178,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    li a2, 32
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v10, v8, 0
+; RV32ZBS-NEXT:    vmseq.vi v0, v8, 0
 ; RV32ZBS-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV32ZBS-NEXT:    vmv.x.s a0, v10
+; RV32ZBS-NEXT:    vmv.x.s a0, v0
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
 ;
@@ -189,9 +189,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    li a2, 32
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v10, v8, 0
+; RV64ZBS-NEXT:    vmseq.vi v0, v8, 0
 ; RV64ZBS-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV64ZBS-NEXT:    vmv.x.s a0, v10
+; RV64ZBS-NEXT:    vmv.x.s a0, v0
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
   %a = load <32 x i8>, ptr %x
@@ -206,10 +206,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    li a2, 64
 ; RV32-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v12, v8, 0
+; RV32-NEXT:    vmseq.vi v0, v8, 0
 ; RV32-NEXT:    srli a0, a1, 5
 ; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT:    vslidedown.vx v8, v12, a0
+; RV32-NEXT:    vslidedown.vx v8, v0, a0
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
@@ -220,9 +220,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    li a2, 64
 ; RV64-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v12, v8, 0
+; RV64-NEXT:    vmseq.vi v0, v8, 0
 ; RV64-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v12
+; RV64-NEXT:    vmv.x.s a0, v0
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
 ; RV64-NEXT:    ret
@@ -232,10 +232,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    li a2, 64
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v12, v8, 0
+; RV32ZBS-NEXT:    vmseq.vi v0, v8, 0
 ; RV32ZBS-NEXT:    srli a0, a1, 5
 ; RV32ZBS-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32ZBS-NEXT:    vslidedown.vx v8, v12, a0
+; RV32ZBS-NEXT:    vslidedown.vx v8, v0, a0
 ; RV32ZBS-NEXT:    vmv.x.s a0, v8
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
@@ -245,9 +245,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    li a2, 64
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v12, v8, 0
+; RV64ZBS-NEXT:    vmseq.vi v0, v8, 0
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
-; RV64ZBS-NEXT:    vmv.x.s a0, v12
+; RV64ZBS-NEXT:    vmv.x.s a0, v0
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
   %a = load <64 x i8>, ptr %x
@@ -262,10 +262,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    li a2, 128
 ; RV32-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v16, v8, 0
+; RV32-NEXT:    vmseq.vi v0, v8, 0
 ; RV32-NEXT:    srli a0, a1, 5
 ; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vx v8, v16, a0
+; RV32-NEXT:    vslidedown.vx v8, v0, a0
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
@@ -276,10 +276,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    li a2, 128
 ; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v16, v8, 0
+; RV64-NEXT:    vmseq.vi v0, v8, 0
 ; RV64-NEXT:    srli a0, a1, 6
 ; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vslidedown.vx v8, v16, a0
+; RV64-NEXT:    vslidedown.vx v8, v0, a0
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
@@ -290,10 +290,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    li a2, 128
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v16, v8, 0
+; RV32ZBS-NEXT:    vmseq.vi v0, v8, 0
 ; RV32ZBS-NEXT:    srli a0, a1, 5
 ; RV32ZBS-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZBS-NEXT:    vslidedown.vx v8, v16, a0
+; RV32ZBS-NEXT:    vslidedown.vx v8, v0, a0
 ; RV32ZBS-NEXT:    vmv.x.s a0, v8
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
@@ -303,10 +303,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    li a2, 128
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v16, v8, 0
+; RV64ZBS-NEXT:    vmseq.vi v0, v8, 0
 ; RV64ZBS-NEXT:    srli a0, a1, 6
 ; RV64ZBS-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64ZBS-NEXT:    vslidedown.vx v8, v16, a0
+; RV64ZBS-NEXT:    vslidedown.vx v8, v0, a0
 ; RV64ZBS-NEXT:    vmv.x.s a0, v8
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
@@ -328,12 +328,12 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    li a2, 128
 ; RV32-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV32-NEXT:    addi a2, a0, 128
-; RV32-NEXT:    vle8.v v16, (a2)
-; RV32-NEXT:    vle8.v v24, (a0)
+; RV32-NEXT:    vle8.v v8, (a2)
+; RV32-NEXT:    vle8.v v16, (a0)
 ; RV32-NEXT:    mv a0, sp
 ; RV32-NEXT:    add a1, a0, a1
-; RV32-NEXT:    vmseq.vi v8, v16, 0
-; RV32-NEXT:    vmseq.vi v0, v24, 0
+; RV32-NEXT:    vmseq.vi v8, v8, 0
+; RV32-NEXT:    vmseq.vi v0, v16, 0
 ; RV32-NEXT:    vmv.v.i v16, 0
 ; RV32-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV32-NEXT:    vse8.v v24, (a0)
@@ -359,12 +359,12 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    li a2, 128
 ; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV64-NEXT:    addi a2, a0, 128
-; RV64-NEXT:    vle8.v v16, (a2)
-; RV64-NEXT:    vle8.v v24, (a0)
+; RV64-NEXT:    vle8.v v8, (a2)
+; RV64-NEXT:    vle8.v v16, (a0)
 ; RV64-NEXT:    mv a0, sp
 ; RV64-NEXT:    add a1, a0, a1
-; RV64-NEXT:    vmseq.vi v8, v16, 0
-; RV64-NEXT:    vmseq.vi v0, v24, 0
+; RV64-NEXT:    vmseq.vi v8, v8, 0
+; RV64-NEXT:    vmseq.vi v0, v16, 0
 ; RV64-NEXT:    vmv.v.i v16, 0
 ; RV64-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV64-NEXT:    vse8.v v24, (a0)
@@ -390,12 +390,12 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    li a2, 128
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV32ZBS-NEXT:    addi a2, a0, 128
-; RV32ZBS-NEXT:    vle8.v v16, (a2)
-; RV32ZBS-NEXT:    vle8.v v24, (a0)
+; RV32ZBS-NEXT:    vle8.v v8, (a2)
+; RV32ZBS-NEXT:    vle8.v v16, (a0)
 ; RV32ZBS-NEXT:    mv a0, sp
 ; RV32ZBS-NEXT:    add a1, a0, a1
-; RV32ZBS-NEXT:    vmseq.vi v8, v16, 0
-; RV32ZBS-NEXT:    vmseq.vi v0, v24, 0
+; RV32ZBS-NEXT:    vmseq.vi v8, v8, 0
+; RV32ZBS-NEXT:    vmseq.vi v0, v16, 0
 ; RV32ZBS-NEXT:    vmv.v.i v16, 0
 ; RV32ZBS-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV32ZBS-NEXT:    vse8.v v24, (a0)
@@ -421,12 +421,12 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    li a2, 128
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV64ZBS-NEXT:    addi a2, a0, 128
-; RV64ZBS-NEXT:    vle8.v v16, (a2)
-; RV64ZBS-NEXT:    vle8.v v24, (a0)
+; RV64ZBS-NEXT:    vle8.v v8, (a2)
+; RV64ZBS-NEXT:    vle8.v v16, (a0)
 ; RV64ZBS-NEXT:    mv a0, sp
 ; RV64ZBS-NEXT:    add a1, a0, a1
-; RV64ZBS-NEXT:    vmseq.vi v8, v16, 0
-; RV64ZBS-NEXT:    vmseq.vi v0, v24, 0
+; RV64ZBS-NEXT:    vmseq.vi v8, v8, 0
+; RV64ZBS-NEXT:    vmseq.vi v0, v16, 0
 ; RV64ZBS-NEXT:    vmv.v.i v16, 0
 ; RV64ZBS-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV64ZBS-NEXT:    vse8.v v24, (a0)
@@ -527,8 +527,8 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v10, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v0
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
@@ -543,8 +543,8 @@ define i1 @extractelt_v64i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 64
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v12, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v0
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
@@ -559,8 +559,8 @@ define i1 @extractelt_v128i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 128
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v16, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v0
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
@@ -575,8 +575,8 @@ define i1 @extractelt_v256i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 128
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v16, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v0
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <256 x i8>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
index 6c2be509f7c22e..bd5833aa082c84 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
@@ -194,23 +194,21 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 ;
 ; ZVFHMIN-LABEL: vp_floor_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_v16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, i32)
 define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.floor.v16f32(<16 x float>, <16 x i1>, i32)
 define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v15f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -743,7 +725,6 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
 ; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
@@ -760,13 +741,11 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
 ; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a1, 2
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
index edb33158e32eba..ed60aeca9f4d70 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
@@ -174,18 +174,16 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v14, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -214,11 +212,11 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v10, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -233,12 +231,10 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -250,18 +246,16 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v20, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -274,11 +268,11 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmax.vv v8, v8, v14
+; ZVFH-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked:
@@ -290,11 +284,11 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v12, v16, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -377,12 +371,10 @@ define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -396,11 +388,11 @@ define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x float> %v
@@ -413,12 +405,10 @@ define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -432,11 +422,11 @@ define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x float> %v
@@ -483,12 +473,10 @@ define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -502,11 +490,11 @@ define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
@@ -519,12 +507,10 @@ define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -538,11 +524,11 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x double> %v
@@ -561,14 +547,12 @@ define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -585,13 +569,25 @@ define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16
 define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %vb, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_v16f64_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x double> %v
@@ -628,10 +624,7 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    bltu a2, a1, .LBB24_2
@@ -640,23 +633,28 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:  .LBB24_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
@@ -668,13 +666,9 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    li a1, 24
 ; CHECK-NEXT:    mul a0, a0, a1
@@ -682,15 +676,11 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmax.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
@@ -711,21 +701,24 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    li a3, 26
 ; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v24, (a1)
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a3, a1, 4
+; CHECK-NEXT:    add a1, a3, a1
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    bltu a2, a1, .LBB25_2
@@ -734,24 +727,30 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
 ; CHECK-NEXT:  .LBB25_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv8r.v v16, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -760,26 +759,32 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vfmax.vv v16, v16, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    li a1, 26
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 02c2fafc89785f..c92770ded42b7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -95,11 +95,11 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v12, v10, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v10, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -114,11 +114,11 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmax.vv v8, v8, v14
+; ZVFH-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_v16f16_vv:
@@ -128,11 +128,11 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v16, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v16, v0
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -181,11 +181,11 @@ define <8 x float> @vfmax_v8f32_vv(<8 x float> %a, <8 x float> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.maximum.v8f32(<8 x float> %a, <8 x float> %b)
   ret <8 x float> %v
@@ -198,11 +198,11 @@ define <16 x float> @vfmax_v16f32_vv(<16 x float> %a, <16 x float> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %a, <16 x float> %b)
   ret <16 x float> %v
@@ -232,11 +232,11 @@ define <4 x double> @vfmax_v4f64_vv(<4 x double> %a, <4 x double> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b)
   ret <4 x double> %v
@@ -249,11 +249,11 @@ define <8 x double> @vfmax_v8f64_vv(<8 x double> %a, <8 x double> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.maximum.v8f64(<8 x double> %a, <8 x double> %b)
   ret <8 x double> %v
@@ -264,13 +264,23 @@ declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>)
 define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind {
 ; CHECK-LABEL: vfmax_v16f64_vv:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.maximum.v16f64(<16 x double> %a, <16 x double> %b)
   ret <16 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
index 48649c43f782ad..c201c8842e0829 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
@@ -174,18 +174,16 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v14, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -214,11 +212,11 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v10, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -233,12 +231,10 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -250,18 +246,16 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v20, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -274,11 +268,11 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmin.vv v8, v8, v14
+; ZVFH-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked:
@@ -290,11 +284,11 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v12, v16, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -377,12 +371,10 @@ define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -396,11 +388,11 @@ define <8 x float> @vfmin_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x float> %v
@@ -413,12 +405,10 @@ define <16 x float> @vfmin_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -432,11 +422,11 @@ define <16 x float> @vfmin_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x float> %v
@@ -483,12 +473,10 @@ define <4 x double> @vfmin_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -502,11 +490,11 @@ define <4 x double> @vfmin_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
@@ -519,12 +507,10 @@ define <8 x double> @vfmin_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -538,11 +524,11 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x double> %v
@@ -561,14 +547,12 @@ define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -585,13 +569,25 @@ define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16
 define <16 x double> @vfmin_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %vb, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_v16f64_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x double> %v
@@ -628,10 +624,7 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    bltu a2, a1, .LBB24_2
@@ -640,23 +633,28 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:  .LBB24_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
@@ -668,13 +666,9 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    li a1, 24
 ; CHECK-NEXT:    mul a0, a0, a1
@@ -682,15 +676,11 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmin.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
@@ -711,21 +701,24 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    li a3, 26
 ; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v24, (a1)
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a3, a1, 4
+; CHECK-NEXT:    add a1, a3, a1
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    li a1, 16
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    bltu a2, a1, .LBB25_2
@@ -734,24 +727,30 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
 ; CHECK-NEXT:  .LBB25_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv8r.v v16, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -760,26 +759,32 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vfmin.vv v16, v16, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    li a1, 26
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index b15d697f0754ed..bed3a456f68ffd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -95,11 +95,11 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v12, v10, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v10, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -114,11 +114,11 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmin.vv v8, v8, v14
+; ZVFH-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_v16f16_vv:
@@ -128,11 +128,11 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v16, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v16, v0
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -181,11 +181,11 @@ define <8 x float> @vfmin_v8f32_vv(<8 x float> %a, <8 x float> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.minimum.v8f32(<8 x float> %a, <8 x float> %b)
   ret <8 x float> %v
@@ -198,11 +198,11 @@ define <16 x float> @vfmin_v16f32_vv(<16 x float> %a, <16 x float> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.minimum.v16f32(<16 x float> %a, <16 x float> %b)
   ret <16 x float> %v
@@ -232,11 +232,11 @@ define <4 x double> @vfmin_v4f64_vv(<4 x double> %a, <4 x double> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b)
   ret <4 x double> %v
@@ -249,11 +249,11 @@ define <8 x double> @vfmin_v8f64_vv(<8 x double> %a, <8 x double> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.minimum.v8f64(<8 x double> %a, <8 x double> %b)
   ret <8 x double> %v
@@ -264,13 +264,23 @@ declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>)
 define <16 x double> @vfmin_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind {
 ; CHECK-LABEL: vfmin_v16f64_vv:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.minimum.v16f64(<16 x double> %a, <16 x double> %b)
   ret <16 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index a566fab1596f60..9a4c8afb22a078 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -22,8 +22,8 @@ define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsm.v v8, (a2)
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10
+; ZVFHMIN-NEXT:    vsm.v v0, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -50,8 +50,8 @@ define void @fcmp_oeq_vv_v8f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsm.v v8, (a2)
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10
+; ZVFHMIN-NEXT:    vsm.v v0, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -166,8 +166,8 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vsm.v v12, (a2)
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10
+; ZVFH-NEXT:    vsm.v v0, (a2)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16:
@@ -178,8 +178,8 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsm.v v8, (a2)
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vsm.v v0, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -194,8 +194,8 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vsm.v v12, (a2)
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10
+; ZVFH-NEXT:    vsm.v v0, (a2)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16_nonans:
@@ -206,8 +206,8 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsm.v v8, (a2)
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vsm.v v0, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -222,8 +222,8 @@ define void @fcmp_oge_vv_v8f32(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = load <8 x float>, ptr %y
@@ -238,8 +238,8 @@ define void @fcmp_oge_vv_v8f32_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = load <8 x float>, ptr %y
@@ -305,8 +305,8 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v12, (a1)
-; ZVFH-NEXT:    vmflt.vv v16, v12, v8
-; ZVFH-NEXT:    vmnot.m v8, v16
+; ZVFH-NEXT:    vmflt.vv v0, v12, v8
+; ZVFH-NEXT:    vmnot.m v8, v0
 ; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
 ;
@@ -319,8 +319,8 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v24, v16
-; ZVFHMIN-NEXT:    vmnot.m v8, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vmnot.m v8, v0
 ; ZVFHMIN-NEXT:    vsm.v v8, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
@@ -337,8 +337,8 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v12, (a1)
-; ZVFH-NEXT:    vmfle.vv v16, v8, v12
-; ZVFH-NEXT:    vsm.v v16, (a2)
+; ZVFH-NEXT:    vmfle.vv v0, v8, v12
+; ZVFH-NEXT:    vsm.v v0, (a2)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vv_v32f16_nonans:
@@ -350,8 +350,8 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v24, v16
-; ZVFHMIN-NEXT:    vsm.v v8, (a2)
+; ZVFHMIN-NEXT:    vmfle.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vsm.v v0, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = load <32 x half>, ptr %y
@@ -366,8 +366,8 @@ define void @fcmp_uge_vv_v16f32(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v12, (a1)
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v8, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
@@ -383,8 +383,8 @@ define void @fcmp_uge_vv_v16f32_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v12, (a1)
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = load <16 x float>, ptr %y
@@ -399,8 +399,8 @@ define void @fcmp_ult_vv_v8f64(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v12, (a1)
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
@@ -416,8 +416,8 @@ define void @fcmp_ult_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v12, (a1)
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %b = load <8 x double>, ptr %y
@@ -433,8 +433,8 @@ define void @fcmp_ugt_vv_v64f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v16, (a1)
-; ZVFH-NEXT:    vmfle.vv v24, v8, v16
-; ZVFH-NEXT:    vmnot.m v8, v24
+; ZVFH-NEXT:    vmfle.vv v0, v8, v16
+; ZVFH-NEXT:    vmnot.m v8, v0
 ; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
@@ -451,8 +451,8 @@ define void @fcmp_ugt_vv_v64f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v16, (a1)
-; ZVFH-NEXT:    vmflt.vv v24, v16, v8
-; ZVFH-NEXT:    vsm.v v24, (a2)
+; ZVFH-NEXT:    vmflt.vv v0, v16, v8
+; ZVFH-NEXT:    vsm.v v0, (a2)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
   %b = load <64 x half>, ptr %y
@@ -468,9 +468,9 @@ define void @fcmp_ueq_vv_v32f32(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v8, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v8, v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
@@ -487,8 +487,8 @@ define void @fcmp_ueq_vv_v32f32_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vmfeq.vv v24, v8, v16
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmfeq.vv v0, v8, v16
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
   %b = load <32 x float>, ptr %y
@@ -503,9 +503,9 @@ define void @fcmp_one_vv_v8f64(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v16, (a1)
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v8, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v8, v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
@@ -521,8 +521,8 @@ define void @fcmp_one_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v16, (a1)
-; CHECK-NEXT:    vmfne.vv v24, v8, v16
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmfne.vv v0, v8, v16
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
   %b = load <16 x double>, ptr %y
@@ -657,8 +657,8 @@ define void @fcmp_oeq_vf_v8f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v12
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -690,8 +690,8 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v12
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -806,8 +806,8 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16:
@@ -823,8 +823,8 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -839,8 +839,8 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16_nonans:
@@ -856,8 +856,8 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -872,8 +872,8 @@ define void @fcmp_oge_vf_v8f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -888,8 +888,8 @@ define void @fcmp_oge_vf_v8f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -955,8 +955,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v12, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v12
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v0
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
@@ -974,8 +974,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v24, v16
-; ZVFHMIN-NEXT:    vmnot.m v8, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vmnot.m v8, v0
 ; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
@@ -992,8 +992,8 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfle.vf v12, v8, fa0
-; ZVFH-NEXT:    vsm.v v12, (a1)
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_v32f16_nonans:
@@ -1010,8 +1010,8 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v24
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v24
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = insertelement <32 x half> poison, half %y, i32 0
@@ -1026,8 +1026,8 @@ define void @fcmp_uge_vf_v16f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
@@ -1043,8 +1043,8 @@ define void @fcmp_uge_vf_v16f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = insertelement <16 x float> poison, float %y, i32 0
@@ -1059,8 +1059,8 @@ define void @fcmp_ult_vf_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
@@ -1076,8 +1076,8 @@ define void @fcmp_ult_vf_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %b = insertelement <8 x double> poison, double %y, i32 0
@@ -1093,8 +1093,8 @@ define void @fcmp_ugt_vf_v64f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfle.vf v16, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v16
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v0
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
@@ -1111,8 +1111,8 @@ define void @fcmp_ugt_vf_v64f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v16, v8, fa0
-; ZVFH-NEXT:    vsm.v v16, (a1)
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
   %b = insertelement <64 x half> poison, half %y, i32 0
@@ -1128,9 +1128,9 @@ define void @fcmp_ueq_vf_v32f32(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v8, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v8, v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
@@ -1147,8 +1147,8 @@ define void @fcmp_ueq_vf_v32f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
   %b = insertelement <32 x float> poison, float %y, i32 0
@@ -1163,9 +1163,9 @@ define void @fcmp_one_vf_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v8, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v8, v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
@@ -1181,8 +1181,8 @@ define void @fcmp_one_vf_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
   %b = insertelement <16 x double> poison, double %y, i32 0
@@ -1330,8 +1330,8 @@ define void @fcmp_oeq_fv_v8f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -1363,8 +1363,8 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -1479,8 +1479,8 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16:
@@ -1496,8 +1496,8 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -1512,8 +1512,8 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16_nonans:
@@ -1529,8 +1529,8 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -1545,8 +1545,8 @@ define void @fcmp_oge_fv_v8f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -1561,8 +1561,8 @@ define void @fcmp_oge_fv_v8f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -1628,8 +1628,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v12, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v12
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v0
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
@@ -1647,8 +1647,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v24
-; ZVFHMIN-NEXT:    vmnot.m v8, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v24
+; ZVFHMIN-NEXT:    vmnot.m v8, v0
 ; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
@@ -1665,8 +1665,8 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfge.vf v12, v8, fa0
-; ZVFH-NEXT:    vsm.v v12, (a1)
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_fv_v32f16_nonans:
@@ -1683,8 +1683,8 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v24, v16
-; ZVFHMIN-NEXT:    vsm.v v8, (a1)
+; ZVFHMIN-NEXT:    vmfle.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vsm.v v0, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = insertelement <32 x half> poison, half %y, i32 0
@@ -1699,8 +1699,8 @@ define void @fcmp_uge_fv_v16f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
@@ -1716,8 +1716,8 @@ define void @fcmp_uge_fv_v16f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = insertelement <16 x float> poison, float %y, i32 0
@@ -1732,8 +1732,8 @@ define void @fcmp_ult_fv_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
@@ -1749,8 +1749,8 @@ define void @fcmp_ult_fv_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %b = insertelement <8 x double> poison, double %y, i32 0
@@ -1766,8 +1766,8 @@ define void @fcmp_ugt_fv_v64f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfge.vf v16, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v16
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v0
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
@@ -1784,8 +1784,8 @@ define void @fcmp_ugt_fv_v64f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v16, v8, fa0
-; ZVFH-NEXT:    vsm.v v16, (a1)
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vsm.v v0, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
   %b = insertelement <64 x half> poison, half %y, i32 0
@@ -1801,9 +1801,9 @@ define void @fcmp_ueq_fv_v32f32(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v8, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v8, v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
@@ -1820,8 +1820,8 @@ define void @fcmp_ueq_fv_v32f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
   %b = insertelement <32 x float> poison, float %y, i32 0
@@ -1836,9 +1836,9 @@ define void @fcmp_one_fv_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v8, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v8, v8, v0
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
@@ -1854,8 +1854,8 @@ define void @fcmp_one_fv_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
   %b = insertelement <16 x double> poison, double %y, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
index 602662b184290a..24d7a87dc3ca08 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
@@ -76,9 +76,8 @@ define <4 x i1> @vfptosi_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptosi_v4i1_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
index c5bfd41ec9510d..da512e3523f758 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
@@ -76,9 +76,8 @@ define <4 x i1> @vfptoui_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptoui_v4i1_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
index 0b08d9401402b8..1a7d440e53816a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
@@ -52,8 +52,8 @@ define void @setgt_vv_v64i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v12, (a1)
-; CHECK-NEXT:    vmslt.vv v16, v12, v8
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmslt.vv v0, v12, v8
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -69,8 +69,8 @@ define void @setlt_vv_v128i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v16, (a1)
-; CHECK-NEXT:    vmslt.vv v24, v8, v16
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmslt.vv v0, v8, v16
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = load <128 x i8>, ptr %y
@@ -118,8 +118,8 @@ define void @setugt_vv_v32i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v10, (a1)
-; CHECK-NEXT:    vmsltu.vv v12, v10, v8
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsltu.vv v0, v10, v8
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -135,8 +135,8 @@ define void @setult_vv_v64i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v12, (a1)
-; CHECK-NEXT:    vmsltu.vv v16, v8, v12
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmsltu.vv v0, v8, v12
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -152,8 +152,8 @@ define void @setuge_vv_v128i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v16, (a1)
-; CHECK-NEXT:    vmsleu.vv v24, v16, v8
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmsleu.vv v0, v16, v8
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = load <128 x i8>, ptr %y
@@ -200,8 +200,8 @@ define void @setne_vx_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsne.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsne.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -217,8 +217,8 @@ define void @setgt_vx_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsgt.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -234,8 +234,8 @@ define void @setlt_vx_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmslt.vx v16, v8, a1
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmslt.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -284,8 +284,8 @@ define void @setugt_vx_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -301,8 +301,8 @@ define void @setult_vx_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsltu.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsltu.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -319,8 +319,8 @@ define void @setuge_vx_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    vmsleu.vv v24, v16, v8
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmsleu.vv v0, v16, v8
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -368,8 +368,8 @@ define void @setne_xv_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsne.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsne.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -385,8 +385,8 @@ define void @setgt_xv_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmslt.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmslt.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -402,8 +402,8 @@ define void @setlt_xv_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vx v16, v8, a1
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmsgt.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -452,8 +452,8 @@ define void @setugt_xv_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsltu.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsltu.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -469,8 +469,8 @@ define void @setult_xv_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -486,8 +486,8 @@ define void @setuge_xv_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsleu.vx v16, v8, a1
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmsleu.vx v0, v8, a1
+; CHECK-NEXT:    vsm.v v0, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -534,8 +534,8 @@ define void @setne_vi_v32i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsne.vi v10, v8, 0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %d = icmp ne <32 x i8> %a, splat (i8 0)
@@ -549,8 +549,8 @@ define void @setgt_vi_v64i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vi v12, v8, 0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmsgt.vi v0, v8, 0
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %d = icmp sgt <64 x i8> %a, splat (i8 0)
@@ -564,8 +564,8 @@ define void @setgt_vi_v64i8_nonzero(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vi v12, v8, 5
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmsgt.vi v0, v8, 5
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %d = icmp sgt <64 x i8> %a, splat (i8 5)
@@ -579,8 +579,8 @@ define void @setlt_vi_v128i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsle.vi v16, v8, -1
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmsle.vi v0, v8, -1
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %d = icmp slt <128 x i8> %a, splat (i8 0)
@@ -622,8 +622,8 @@ define void @setugt_vi_v32i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 5
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 5
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %d = icmp ugt <32 x i8> %a, splat (i8 5)
@@ -637,8 +637,8 @@ define void @setult_vi_v64i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsleu.vi v12, v8, 4
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %d = icmp ult <64 x i8> %a, splat (i8 5)
@@ -652,8 +652,8 @@ define void @setuge_vi_v128i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 4
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4
+; CHECK-NEXT:    vsm.v v0, (a1)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %d = icmp uge <128 x i8> %a, splat (i8 5)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
index 9161cedd58e3c6..27adc7ab40adfd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
@@ -20,8 +20,8 @@ define void @vector_interleave_store_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b, ptr
 ; CHECK-NEXT:    li a2, -1
 ; CHECK-NEXT:    vwmaccu.vx v12, a2, v10
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v12, 0
-; CHECK-NEXT:    vsm.v v8, (a0)
+; CHECK-NEXT:    vmsne.vi v0, v12, 0
+; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
   %res = call <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b)
   store <32 x i1> %res, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 4f7b885d998e5b..bb125ba773b896 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -167,8 +167,8 @@ define void @splat_v32i1(ptr %x, i1 %y) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v10, v8, 0
-; CHECK-NEXT:    vsm.v v10, (a0)
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
   %a = insertelement <32 x i1> poison, i1 %y, i32 0
   %b = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> zeroinitializer
@@ -201,8 +201,8 @@ define void @splat_v64i1(ptr %x, i1 %y) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v12, v8, 0
-; CHECK-NEXT:    vsm.v v12, (a0)
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
   %a = insertelement <64 x i1> poison, i1 %y, i32 0
   %b = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
index 79b05334cb1f92..59a0b45646dd46 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
@@ -365,11 +365,11 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    addi a3, a1, 128
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v16, (a1)
-; RV32-NEXT:    vle64.v v24, (a3)
+; RV32-NEXT:    vle64.v v8, (a1)
+; RV32-NEXT:    vle64.v v16, (a3)
 ; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v8, v16, fa5
-; RV32-NEXT:    vmfeq.vf v0, v24, fa5
+; RV32-NEXT:    vmfeq.vf v8, v8, fa5
+; RV32-NEXT:    vmfeq.vf v0, v16, fa5
 ; RV32-NEXT:    addi a1, a0, 128
 ; RV32-NEXT:    vle64.v v16, (a1), v0.t
 ; RV32-NEXT:    vmv1r.v v0, v8
@@ -383,11 +383,11 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    addi a3, a1, 128
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v16, (a1)
-; RV64-NEXT:    vle64.v v24, (a3)
+; RV64-NEXT:    vle64.v v8, (a1)
+; RV64-NEXT:    vle64.v v16, (a3)
 ; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v8, v16, fa5
-; RV64-NEXT:    vmfeq.vf v0, v24, fa5
+; RV64-NEXT:    vmfeq.vf v8, v8, fa5
+; RV64-NEXT:    vmfeq.vf v0, v16, fa5
 ; RV64-NEXT:    addi a1, a0, 128
 ; RV64-NEXT:    vle64.v v16, (a1), v0.t
 ; RV64-NEXT:    vmv1r.v v0, v8
@@ -429,11 +429,11 @@ define void @masked_load_v64f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; CHECK-NEXT:    addi a3, a1, 128
 ; CHECK-NEXT:    li a4, 32
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vle32.v v24, (a3)
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vle32.v v16, (a3)
 ; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vmfeq.vf v0, v24, fa5
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa5
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa5
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle32.v v16, (a1), v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
@@ -456,11 +456,11 @@ define void @masked_load_v128f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; CHECK-NEXT:    addi a3, a1, 128
 ; CHECK-NEXT:    li a4, 64
 ; CHECK-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a1)
-; CHECK-NEXT:    vle16.v v24, (a3)
+; CHECK-NEXT:    vle16.v v8, (a1)
+; CHECK-NEXT:    vle16.v v16, (a3)
 ; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vmfeq.vf v0, v24, fa5
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa5
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa5
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle16.v v16, (a1), v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
index 4bbda2152a6f97..b499709711b8cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
@@ -402,12 +402,12 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; RV32-NEXT:    addi a3, a1, 128
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vle64.v v16, (a3)
-; RV32-NEXT:    vle64.v v0, (a1)
+; RV32-NEXT:    vle64.v v8, (a1)
 ; RV32-NEXT:    li a1, 32
 ; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.i v24, 0
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vmseq.vv v8, v0, v24
+; RV32-NEXT:    vmseq.vv v8, v8, v24
 ; RV32-NEXT:    vmseq.vv v0, v16, v24
 ; RV32-NEXT:    addi a1, a0, 128
 ; RV32-NEXT:    vle64.v v16, (a1), v0.t
@@ -422,10 +422,10 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    addi a3, a1, 128
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v16, (a1)
-; RV64-NEXT:    vle64.v v24, (a3)
-; RV64-NEXT:    vmseq.vi v8, v16, 0
-; RV64-NEXT:    vmseq.vi v0, v24, 0
+; RV64-NEXT:    vle64.v v8, (a1)
+; RV64-NEXT:    vle64.v v16, (a3)
+; RV64-NEXT:    vmseq.vi v8, v8, 0
+; RV64-NEXT:    vmseq.vi v0, v16, 0
 ; RV64-NEXT:    addi a1, a0, 128
 ; RV64-NEXT:    vle64.v v16, (a1), v0.t
 ; RV64-NEXT:    vmv1r.v v0, v8
@@ -484,10 +484,10 @@ define void @masked_load_v64i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; CHECK-NEXT:    addi a3, a1, 128
 ; CHECK-NEXT:    li a4, 32
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vle32.v v24, (a3)
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vmseq.vi v0, v24, 0
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vle32.v v16, (a3)
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vmseq.vi v0, v16, 0
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle32.v v16, (a1), v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
@@ -528,10 +528,10 @@ define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 ; CHECK-NEXT:    addi a3, a1, 128
 ; CHECK-NEXT:    li a4, 128
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a1)
-; CHECK-NEXT:    vle8.v v24, (a3)
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vmseq.vi v0, v24, 0
+; CHECK-NEXT:    vle8.v v8, (a1)
+; CHECK-NEXT:    vle8.v v16, (a3)
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vmseq.vi v0, v16, 0
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle8.v v16, (a1), v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
index a1e81ea41c249b..fa259f832bc233 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
@@ -363,82 +363,38 @@ declare void @llvm.masked.store.v32f32.p0v32f32(<32 x float>, <32 x float>*, i32
 define void @masked_store_v32f64(<32 x double>* %val_ptr, <32 x double>* %a, <32 x double>* %m_ptr) nounwind {
 ; RV32-LABEL: masked_store_v32f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    sub sp, sp, a3
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    addi a2, a2, 128
 ; RV32-NEXT:    vle64.v v16, (a2)
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 3
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 16
-; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a2, a2, 128
+; RV32-NEXT:    vle64.v v8, (a2)
 ; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vle64.v v24, (a0)
+; RV32-NEXT:    vmfeq.vf v0, v16, fa5
+; RV32-NEXT:    vle64.v v16, (a0)
 ; RV32-NEXT:    addi a0, a0, 128
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmfeq.vf v8, v16, fa5
-; RV32-NEXT:    vse64.v v24, (a1), v0.t
+; RV32-NEXT:    vle64.v v24, (a0)
+; RV32-NEXT:    vmfeq.vf v8, v8, fa5
+; RV32-NEXT:    vse64.v v16, (a1), v0.t
 ; RV32-NEXT:    addi a0, a1, 128
 ; RV32-NEXT:    vmv1r.v v0, v8
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vse64.v v8, (a0), v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    vse64.v v24, (a0), v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: masked_store_v32f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 4
-; RV64-NEXT:    sub sp, sp, a3
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v8, (a2)
-; RV64-NEXT:    addi a2, a2, 128
 ; RV64-NEXT:    vle64.v v16, (a2)
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 16
-; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a2, a2, 128
+; RV64-NEXT:    vle64.v v8, (a2)
 ; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vle64.v v24, (a0)
+; RV64-NEXT:    vmfeq.vf v0, v16, fa5
+; RV64-NEXT:    vle64.v v16, (a0)
 ; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vmfeq.vf v8, v16, fa5
-; RV64-NEXT:    vse64.v v24, (a1), v0.t
+; RV64-NEXT:    vle64.v v24, (a0)
+; RV64-NEXT:    vmfeq.vf v8, v8, fa5
+; RV64-NEXT:    vse64.v v16, (a1), v0.t
 ; RV64-NEXT:    addi a0, a1, 128
 ; RV64-NEXT:    vmv1r.v v0, v8
-; RV64-NEXT:    addi a1, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vse64.v v8, (a0), v0.t
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    vse64.v v24, (a0), v0.t
 ; RV64-NEXT:    ret
   %m = load <32 x double>, <32 x double>* %m_ptr
   %mask = fcmp oeq <32 x double> %m, zeroinitializer
@@ -470,43 +426,21 @@ declare void @llvm.masked.store.v64f16.p0v64f16(<64 x half>, <64 x half>*, i32,
 define void @masked_store_v64f32(<64 x float>* %val_ptr, <64 x float>* %a, <64 x float>* %m_ptr) nounwind {
 ; CHECK-LABEL: masked_store_v64f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
 ; CHECK-NEXT:    vle32.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a2, a2, 128
+; CHECK-NEXT:    vle32.v v8, (a2)
 ; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle32.v v24, (a0)
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa5
+; CHECK-NEXT:    vle32.v v16, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vse32.v v24, (a1), v0.t
+; CHECK-NEXT:    vle32.v v24, (a0)
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa5
+; CHECK-NEXT:    vse32.v v16, (a1), v0.t
 ; CHECK-NEXT:    addi a0, a1, 128
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    vse32.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vse32.v v24, (a0), v0.t
 ; CHECK-NEXT:    ret
   %m = load <64 x float>, <64 x float>* %m_ptr
   %mask = fcmp oeq <64 x float> %m, zeroinitializer
@@ -519,43 +453,21 @@ declare void @llvm.masked.store.v64f16.p0v64f32(<64 x float>, <64 x float>*, i32
 define void @masked_store_v128f16(<128 x half>* %val_ptr, <128 x half>* %a, <128 x half>* %m_ptr) nounwind {
 ; CHECK-LABEL: masked_store_v128f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
 ; CHECK-NEXT:    vle16.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a2, a2, 128
+; CHECK-NEXT:    vle16.v v8, (a2)
 ; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v24, (a0)
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa5
+; CHECK-NEXT:    vle16.v v16, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vse16.v v24, (a1), v0.t
+; CHECK-NEXT:    vle16.v v24, (a0)
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa5
+; CHECK-NEXT:    vse16.v v16, (a1), v0.t
 ; CHECK-NEXT:    addi a0, a1, 128
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    vse16.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vse16.v v24, (a0), v0.t
 ; CHECK-NEXT:    ret
   %m = load <128 x half>, <128 x half>* %m_ptr
   %mask = fcmp oeq <128 x half> %m, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
index b3011d0f01cab1..adcbbd9f4070af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
@@ -401,54 +401,30 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 18
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 1
 ; RV32-NEXT:    sub sp, sp, a3
 ; RV32-NEXT:    addi a3, a2, 128
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v8, (a3)
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a4, a3, 3
-; RV32-NEXT:    add a3, a4, a3
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vle64.v v0, (a2)
+; RV32-NEXT:    vle64.v v0, (a3)
+; RV32-NEXT:    vle64.v v8, (a2)
 ; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.i v24, 0
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vmseq.vv v8, v0, v24
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 3
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vmseq.vv v8, v8, v24
+; RV32-NEXT:    addi a2, sp, 16
 ; RV32-NEXT:    vs1r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a2, a0, 128
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    vle64.v v16, (a0)
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a2, a0, 3
-; RV32-NEXT:    add a0, a2, a0
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmseq.vv v0, v16, v24
+; RV32-NEXT:    vle64.v v16, (a2)
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vmseq.vv v0, v0, v24
 ; RV32-NEXT:    addi a0, a1, 128
-; RV32-NEXT:    vse64.v v8, (a0), v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vse64.v v16, (a0), v0.t
 ; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse64.v v8, (a1), v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 18
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
@@ -457,37 +433,27 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    slli a3, a3, 3
 ; RV64-NEXT:    sub sp, sp, a3
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a2)
 ; RV64-NEXT:    addi a2, a2, 128
 ; RV64-NEXT:    vle64.v v16, (a2)
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    addi a2, sp, 16
 ; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vmseq.vi v0, v8, 0
 ; RV64-NEXT:    vle64.v v24, (a0)
 ; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v16, (a0)
 ; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vmseq.vi v8, v16, 0
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vmseq.vi v8, v8, 0
 ; RV64-NEXT:    vse64.v v24, (a1), v0.t
 ; RV64-NEXT:    addi a0, a1, 128
 ; RV64-NEXT:    vmv1r.v v0, v8
-; RV64-NEXT:    addi a1, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vse64.v v8, (a0), v0.t
+; RV64-NEXT:    vse64.v v16, (a0), v0.t
 ; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    slli a0, a0, 3
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -540,38 +506,28 @@ define void @masked_store_v64i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
+; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    sub sp, sp, a3
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a2)
 ; CHECK-NEXT:    addi a2, a2, 128
 ; CHECK-NEXT:    vle32.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle32.v v24, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v16, (a0)
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
 ; CHECK-NEXT:    vse32.v v24, (a1), v0.t
 ; CHECK-NEXT:    addi a0, a1, 128
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    vse32.v v8, (a0), v0.t
+; CHECK-NEXT:    vse32.v v16, (a0), v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -606,38 +562,28 @@ define void @masked_store_v128i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
+; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    sub sp, sp, a3
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a2)
 ; CHECK-NEXT:    addi a2, a2, 128
 ; CHECK-NEXT:    vle16.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle16.v v24, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v16, (a0)
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v24, (a1), v0.t
 ; CHECK-NEXT:    addi a0, a1, 128
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    vse16.v v8, (a0), v0.t
+; CHECK-NEXT:    vse16.v v16, (a0), v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -654,38 +600,28 @@ define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
+; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    sub sp, sp, a3
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a2)
 ; CHECK-NEXT:    addi a2, a2, 128
 ; CHECK-NEXT:    vle8.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle8.v v24, (a0)
 ; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v16, (a0)
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v24, (a1), v0.t
 ; CHECK-NEXT:    addi a0, a1, 128
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    vse8.v v8, (a0), v0.t
+; CHECK-NEXT:    vse8.v v16, (a0), v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
index 19f3d3ce19fa4c..efa2a8c2e41b0b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
@@ -135,21 +135,19 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -263,21 +261,19 @@ declare <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float>, <8 x i1>, i32)
 define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -307,21 +303,19 @@ declare <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float>, <16 x i1>, i32)
 define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -393,21 +387,19 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -437,21 +429,19 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -481,21 +471,19 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v15f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -525,21 +513,19 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -572,15 +558,17 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    add a1, sp, a1
+; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v7, v0, 2
+; CHECK-NEXT:    vslidedown.vi v24, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
@@ -589,40 +577,47 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
 ; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    addi a1, a0, -16
 ; CHECK-NEXT:    sltu a0, a0, a1
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmv1r.v v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfabs.v v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmv.v.v v16, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 7dcfb247d37cbe..6d7d53689d0975 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -1672,8 +1672,8 @@ define float @vreduce_fminimum_v8f32(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB103_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -1708,8 +1708,8 @@ define float @vreduce_fminimum_v16f32(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB105_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -1745,8 +1745,8 @@ define float @vreduce_fminimum_v32f32(ptr %x) {
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB107_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -1783,25 +1783,36 @@ define float @vreduce_fminimum_v64f32(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v16, (a0)
 ; CHECK-NEXT:    vle32.v v24, (a1)
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v8, v24, v24
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB109_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -1812,7 +1823,10 @@ define float @vreduce_fminimum_v64f32(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB109_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -1846,84 +1860,112 @@ define float @vreduce_fminimum_v128f32(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a2, a2, a1
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle32.v v16, (a1)
 ; CHECK-NEXT:    addi a1, a0, 384
 ; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vle32.v v24, (a0)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a3, a2, 4
+; CHECK-NEXT:    add a2, a3, a2
+; CHECK-NEXT:    add a2, sp, a2
+; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vle32.v v24, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a2, a0, 3
+; CHECK-NEXT:    add a0, a2, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vle32.v v24, (a1)
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v24, v8, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v24, v24, v8
 ; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v16, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB111_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -1934,8 +1976,10 @@ define float @vreduce_fminimum_v128f32(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB111_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a1, a1, a0
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
@@ -2012,8 +2056,8 @@ define double @vreduce_fminimum_v4f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB115_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI115_0)
@@ -2048,8 +2092,8 @@ define double @vreduce_fminimum_v8f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB117_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI117_0)
@@ -2084,8 +2128,8 @@ define double @vreduce_fminimum_v16f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB119_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI119_0)
@@ -2121,24 +2165,35 @@ define double @vreduce_fminimum_v32f64(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v16, (a0)
 ; CHECK-NEXT:    vle64.v v24, (a1)
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v8, v24, v24
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB121_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI121_0)
@@ -2149,7 +2204,10 @@ define double @vreduce_fminimum_v32f64(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB121_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -2182,83 +2240,111 @@ define double @vreduce_fminimum_v64f64(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a2, a2, a1
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a1)
 ; CHECK-NEXT:    addi a1, a0, 384
 ; CHECK-NEXT:    vle64.v v8, (a1)
-; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vle64.v v24, (a0)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a3, a2, 4
+; CHECK-NEXT:    add a2, a3, a2
+; CHECK-NEXT:    add a2, sp, a2
+; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a2, a0, 3
+; CHECK-NEXT:    add a0, a2, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vle64.v v24, (a1)
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v24, v8, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v24, v24, v8
 ; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v16, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB123_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI123_0)
@@ -2269,8 +2355,10 @@ define double @vreduce_fminimum_v64f64(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB123_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a1, a1, a0
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
@@ -2382,8 +2470,8 @@ define float @vreduce_fmaximum_v8f32(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB129_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -2418,8 +2506,8 @@ define float @vreduce_fmaximum_v16f32(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB131_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -2455,8 +2543,8 @@ define float @vreduce_fmaximum_v32f32(ptr %x) {
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB133_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -2493,25 +2581,36 @@ define float @vreduce_fmaximum_v64f32(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v16, (a0)
 ; CHECK-NEXT:    vle32.v v24, (a1)
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v8, v24, v24
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB135_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -2522,7 +2621,10 @@ define float @vreduce_fmaximum_v64f32(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB135_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -2556,84 +2658,112 @@ define float @vreduce_fmaximum_v128f32(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a2, a2, a1
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle32.v v16, (a1)
 ; CHECK-NEXT:    addi a1, a0, 384
 ; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vle32.v v24, (a0)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a3, a2, 4
+; CHECK-NEXT:    add a2, a3, a2
+; CHECK-NEXT:    add a2, sp, a2
+; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vle32.v v24, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a2, a0, 3
+; CHECK-NEXT:    add a0, a2, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vle32.v v24, (a1)
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v24, v8, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v24, v24, v8
 ; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v16, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB137_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, 523264
@@ -2644,8 +2774,10 @@ define float @vreduce_fmaximum_v128f32(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB137_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a1, a1, a0
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
@@ -2722,8 +2854,8 @@ define double @vreduce_fmaximum_v4f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v10
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB141_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI141_0)
@@ -2758,8 +2890,8 @@ define double @vreduce_fmaximum_v8f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v12
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB143_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI143_0)
@@ -2794,8 +2926,8 @@ define double @vreduce_fmaximum_v16f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB145_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI145_0)
@@ -2831,24 +2963,35 @@ define double @vreduce_fmaximum_v32f64(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v16, (a0)
 ; CHECK-NEXT:    vle64.v v24, (a1)
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v8, v24, v24
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB147_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI147_0)
@@ -2859,7 +3002,10 @@ define double @vreduce_fmaximum_v32f64(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB147_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -2892,83 +3038,111 @@ define double @vreduce_fmaximum_v64f64(ptr %x) {
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    add a2, a2, a1
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a1)
 ; CHECK-NEXT:    addi a1, a0, 384
 ; CHECK-NEXT:    vle64.v v8, (a1)
-; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vle64.v v24, (a0)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a1, a0, 256
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a3, a2, 4
+; CHECK-NEXT:    add a2, a3, a2
+; CHECK-NEXT:    add a2, sp, a2
+; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a2, a0, 3
+; CHECK-NEXT:    add a0, a2, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vle64.v v24, (a1)
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v24, v8, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v24, v24, v8
 ; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v16, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vcpop.m a0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v8
+; CHECK-NEXT:    vcpop.m a0, v0
 ; CHECK-NEXT:    beqz a0, .LBB149_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a0, %hi(.LCPI149_0)
@@ -2979,8 +3153,10 @@ define double @vreduce_fmaximum_v64f64(ptr %x) {
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:  .LBB149_3:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add a1, a1, a0
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index 2495178ea762d9..bd3e3f88c3ea18 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -1586,8 +1586,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3
 ; RV32-NEXT:    mv a2, a0
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vmsltu.vx v9, v10, a1
-; RV32-NEXT:    vmand.mm v0, v9, v0
+; RV32-NEXT:    vmsltu.vx v2, v10, a1
+; RV32-NEXT:    vmand.mm v0, v2, v0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vmv.v.i v9, 1
 ; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
@@ -1615,8 +1615,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3
 ; RV64-NEXT:    mv a2, a0
 ; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vmsltu.vx v9, v10, a1
-; RV64-NEXT:    vmand.mm v0, v9, v0
+; RV64-NEXT:    vmsltu.vx v2, v10, a1
+; RV64-NEXT:    vmand.mm v0, v2, v0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vmv.v.i v9, 1
 ; RV64-NEXT:    vmerge.vvm v8, v9, v8, v0
@@ -1650,8 +1650,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m,
 ; RV32-NEXT:    mv a2, a0
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vid.v v12
-; RV32-NEXT:    vmsltu.vx v9, v12, a1
-; RV32-NEXT:    vmand.mm v0, v9, v0
+; RV32-NEXT:    vmsltu.vx v4, v12, a1
+; RV32-NEXT:    vmand.mm v0, v4, v0
 ; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV32-NEXT:    vmv.v.i v9, 1
 ; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
@@ -1681,8 +1681,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m,
 ; RV64-NEXT:    mv a2, a0
 ; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV64-NEXT:    vid.v v12
-; RV64-NEXT:    vmsltu.vx v9, v12, a1
-; RV64-NEXT:    vmand.mm v0, v9, v0
+; RV64-NEXT:    vmsltu.vx v4, v12, a1
+; RV64-NEXT:    vmand.mm v0, v4, v0
 ; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; RV64-NEXT:    vmv.v.i v9, 1
 ; RV64-NEXT:    vmerge.vvm v8, v9, v8, v0
@@ -1719,8 +1719,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m,
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; RV32-NEXT:    vid.v v16
-; RV32-NEXT:    vmsltu.vx v10, v16, a1
-; RV32-NEXT:    vmand.mm v0, v10, v0
+; RV32-NEXT:    vmsltu.vx v16, v16, a1
+; RV32-NEXT:    vmand.mm v0, v16, v0
 ; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; RV32-NEXT:    vmv.v.i v10, 1
 ; RV32-NEXT:    vmerge.vvm v8, v10, v8, v0
@@ -1753,8 +1753,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m,
 ; RV64-NEXT:    li a0, 32
 ; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; RV64-NEXT:    vid.v v16
-; RV64-NEXT:    vmsltu.vx v10, v16, a1
-; RV64-NEXT:    vmand.mm v0, v10, v0
+; RV64-NEXT:    vmsltu.vx v16, v16, a1
+; RV64-NEXT:    vmand.mm v0, v16, v0
 ; RV64-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; RV64-NEXT:    vmv.v.i v10, 1
 ; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
@@ -1796,14 +1796,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
 ; RV32-NEXT:    vle8.v v12, (a2)
 ; RV32-NEXT:    mv a2, a0
 ; RV32-NEXT:    vid.v v16
-; RV32-NEXT:    vmsltu.vx v14, v16, a1
-; RV32-NEXT:    vsext.vf4 v16, v12
-; RV32-NEXT:    vmsltu.vx v12, v16, a1
+; RV32-NEXT:    vmsltu.vx v16, v16, a1
+; RV32-NEXT:    vsext.vf4 v24, v12
+; RV32-NEXT:    vmsltu.vx v24, v24, a1
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslideup.vi v14, v12, 4
+; RV32-NEXT:    vslideup.vi v16, v24, 4
 ; RV32-NEXT:    li a0, 64
 ; RV32-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV32-NEXT:    vmand.mm v0, v14, v0
+; RV32-NEXT:    vmand.mm v0, v16, v0
 ; RV32-NEXT:    vmv.v.i v12, 1
 ; RV32-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; RV32-NEXT:    vslidedown.vx v12, v8, a3
@@ -1840,14 +1840,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
 ; RV64-NEXT:    vle8.v v12, (a2)
 ; RV64-NEXT:    mv a2, a0
 ; RV64-NEXT:    vid.v v16
-; RV64-NEXT:    vmsltu.vx v14, v16, a1
-; RV64-NEXT:    vsext.vf4 v16, v12
-; RV64-NEXT:    vmsltu.vx v12, v16, a1
+; RV64-NEXT:    vmsltu.vx v16, v16, a1
+; RV64-NEXT:    vsext.vf4 v24, v12
+; RV64-NEXT:    vmsltu.vx v24, v24, a1
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslideup.vi v14, v12, 4
+; RV64-NEXT:    vslideup.vi v16, v24, 4
 ; RV64-NEXT:    li a0, 64
 ; RV64-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; RV64-NEXT:    vmand.mm v0, v14, v0
+; RV64-NEXT:    vmand.mm v0, v16, v0
 ; RV64-NEXT:    vmv.v.i v12, 1
 ; RV64-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; RV64-NEXT:    vslidedown.vx v12, v8, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
index 920d0d5fe7ba74..b56b459535454a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
@@ -123,19 +123,17 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -239,19 +237,17 @@ declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32)
 define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -279,19 +275,17 @@ declare <16 x float> @llvm.vp.rint.v16f32(<16 x float>, <16 x i1>, i32)
 define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -357,19 +351,17 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -397,19 +389,17 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -437,19 +427,17 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v15f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -477,19 +465,17 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -523,7 +509,6 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
 ; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
@@ -540,12 +525,10 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
 ; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
index 6f045349423c7b..cd196c3b903d55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
@@ -194,23 +194,21 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 ;
 ; ZVFHMIN-LABEL: vp_round_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_v16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, i32)
 define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.round.v16f32(<16 x float>, <16 x i1>, i32)
 define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v15f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -743,7 +725,6 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
 ; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
@@ -760,13 +741,11 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
 ; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a1, 4
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
index 738d7e37c50bde..6e459a7539b40e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
@@ -194,23 +194,21 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_v16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x i1>, i32)
 define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.roundeven.v16f32(<16 x float>, <16 x i1>, i32)
 define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v15f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -743,7 +725,6 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
 ; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
@@ -760,13 +741,11 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
 ; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a1, 0
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
index 6f5b7875266b12..c04f205a78bbc1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
@@ -194,23 +194,21 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_v16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -431,21 +425,19 @@ declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32)
 define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -475,21 +467,19 @@ declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32)
 define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -561,21 +551,19 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -605,21 +593,19 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -649,21 +635,19 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v15f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -693,21 +677,19 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -743,7 +725,6 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32
 ; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
@@ -760,13 +741,11 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32
 ; CHECK-NEXT:    lui a2, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a2)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a1, 1
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
index 4598bf67a23637..4e5e210bc4d67f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
@@ -26,8 +26,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f16(<7 x half> %va, <7 x half> %vb, <7 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <7 x i1> @llvm.vp.fcmp.v7f16(<7 x half> %va, <7 x half> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl)
   ret <7 x i1> %v
@@ -48,8 +47,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -73,8 +71,7 @@ define <8 x i1> @fcmp_oeq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -100,8 +97,7 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -122,8 +118,7 @@ define <8 x i1> @fcmp_ogt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -147,8 +142,7 @@ define <8 x i1> @fcmp_ogt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -174,8 +168,7 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -196,8 +189,7 @@ define <8 x i1> @fcmp_oge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -221,8 +213,7 @@ define <8 x i1> @fcmp_oge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -248,8 +239,7 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -270,8 +260,7 @@ define <8 x i1> @fcmp_olt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"olt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -295,8 +284,7 @@ define <8 x i1> @fcmp_olt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -322,8 +310,7 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -344,8 +331,7 @@ define <8 x i1> @fcmp_ole_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ole", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -369,8 +355,7 @@ define <8 x i1> @fcmp_ole_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -396,8 +381,7 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -420,9 +404,9 @@ define <8 x i1> @fcmp_one_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v2, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"one", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -448,9 +432,9 @@ define <8 x i1> @fcmp_one_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v2, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -478,9 +462,9 @@ define <8 x i1> @fcmp_one_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v2, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -502,12 +486,12 @@ define <8 x i1> @fcmp_ord_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ord", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -534,12 +518,12 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v10, v10, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v2, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -568,12 +552,12 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v10, v10, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -596,9 +580,9 @@ define <8 x i1> @fcmp_ueq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v2, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -624,9 +608,9 @@ define <8 x i1> @fcmp_ueq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v2, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -654,9 +638,9 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v2, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -678,8 +662,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -704,8 +688,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -732,8 +716,8 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -755,8 +739,8 @@ define <8 x i1> @fcmp_uge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -781,8 +765,8 @@ define <8 x i1> @fcmp_uge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -809,8 +793,8 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -832,8 +816,8 @@ define <8 x i1> @fcmp_ult_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -858,8 +842,8 @@ define <8 x i1> @fcmp_ult_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -886,8 +870,8 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -909,8 +893,8 @@ define <8 x i1> @fcmp_ule_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ule", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -935,8 +919,8 @@ define <8 x i1> @fcmp_ule_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -963,8 +947,8 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -985,8 +969,7 @@ define <8 x i1> @fcmp_une_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"une", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1010,8 +993,7 @@ define <8 x i1> @fcmp_une_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1037,8 +1019,7 @@ define <8 x i1> @fcmp_une_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1060,12 +1041,12 @@ define <8 x i1> @fcmp_uno_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v2, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v10, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfne.vv v0, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"uno", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1092,12 +1073,12 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v2, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v10, v10, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfne.vv v0, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v2, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1126,12 +1107,12 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v2, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v10, v10, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v2
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1147,21 +1128,16 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
 ; ZVFH-NEXT:    addi sp, sp, -16
 ; ZVFH-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFH-NEXT:    csrr a1, vlenb
-; ZVFH-NEXT:    slli a1, a1, 4
+; ZVFH-NEXT:    slli a1, a1, 3
 ; ZVFH-NEXT:    sub sp, sp, a1
-; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFH-NEXT:    addi a1, a0, 128
 ; ZVFH-NEXT:    li a3, 64
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v24, (a1)
-; ZVFH-NEXT:    csrr a1, vlenb
-; ZVFH-NEXT:    slli a1, a1, 3
-; ZVFH-NEXT:    add a1, sp, a1
-; ZVFH-NEXT:    addi a1, a1, 16
+; ZVFH-NEXT:    addi a1, sp, 16
 ; ZVFH-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vle16.v v24, (a0)
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
 ; ZVFH-NEXT:    vslidedown.vi v7, v0, 8
 ; ZVFH-NEXT:    mv a0, a2
@@ -1170,26 +1146,21 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
 ; ZVFH-NEXT:    li a0, 64
 ; ZVFH-NEXT:  .LBB43_2:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT:    vmfeq.vv v6, v8, v24, v0.t
+; ZVFH-NEXT:    vmfeq.vv v24, v8, v24, v0.t
 ; ZVFH-NEXT:    addi a0, a2, -64
 ; ZVFH-NEXT:    sltu a1, a2, a0
 ; ZVFH-NEXT:    addi a1, a1, -1
 ; ZVFH-NEXT:    and a0, a1, a0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    addi a0, sp, 16
 ; ZVFH-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT:    vmfeq.vv v24, v16, v8, v0.t
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v8, v0.t
 ; ZVFH-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; ZVFH-NEXT:    vslideup.vi v6, v24, 8
-; ZVFH-NEXT:    vmv.v.v v0, v6
+; ZVFH-NEXT:    vslideup.vi v24, v0, 8
+; ZVFH-NEXT:    vmv.v.v v0, v24
 ; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
+; ZVFH-NEXT:    slli a0, a0, 3
 ; ZVFH-NEXT:    add sp, sp, a0
 ; ZVFH-NEXT:    addi sp, sp, 16
 ; ZVFH-NEXT:    ret
@@ -2299,8 +2270,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f64(<7 x double> %va, <7 x double> %vb, <7 x i1>
 ; CHECK-LABEL: fcmp_oeq_vv_v7f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <7 x i1> @llvm.vp.fcmp.v7f64(<7 x double> %va, <7 x double> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl)
   ret <7 x i1> %v
@@ -2312,8 +2282,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_oeq_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2323,8 +2292,7 @@ define <8 x i1> @fcmp_oeq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_oeq_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2336,8 +2304,7 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_oeq_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2349,8 +2316,7 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ogt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2360,8 +2326,7 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ogt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2373,8 +2338,7 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ogt_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2386,8 +2350,7 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_oge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2397,8 +2360,7 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_oge_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2410,8 +2372,7 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_oge_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2423,8 +2384,7 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_olt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2434,8 +2394,7 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_olt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2447,8 +2406,7 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_olt_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2460,8 +2418,7 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ole_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2471,8 +2428,7 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ole_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2484,8 +2440,7 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ole_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2497,9 +2452,9 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_one_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v17, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2509,9 +2464,9 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_one_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2523,9 +2478,9 @@ define <8 x i1> @fcmp_one_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_one_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2537,9 +2492,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ord_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12, v0.t
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2551,9 +2506,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v4, v12, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2567,9 +2522,9 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v4, v12, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2581,9 +2536,9 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ueq_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v17, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2593,9 +2548,9 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ueq_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2607,9 +2562,9 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ueq_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2621,8 +2576,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ugt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2632,8 +2587,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ugt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2645,8 +2600,8 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ugt_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2658,8 +2613,8 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_uge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2669,8 +2624,8 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_uge_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2682,8 +2637,8 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_uge_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2695,8 +2650,8 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ult_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2706,8 +2661,8 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ult_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2719,8 +2674,8 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ult_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2732,8 +2687,8 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ule_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2743,8 +2698,8 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ule_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2756,8 +2711,8 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ule_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2769,8 +2724,7 @@ define <8 x i1> @fcmp_une_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_une_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2780,8 +2734,7 @@ define <8 x i1> @fcmp_une_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_une_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2793,8 +2746,7 @@ define <8 x i1> @fcmp_une_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_une_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2806,9 +2758,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_uno_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12, v0.t
-; CHECK-NEXT:    vmfne.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v4, v12, v12, v0.t
+; CHECK-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2820,9 +2772,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v4, v12, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2836,9 +2788,9 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v4, v12, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -2854,23 +2806,18 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v24, (a1)
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v24, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    bltu a2, a1, .LBB87_2
@@ -2878,26 +2825,21 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x
 ; CHECK-NEXT:    li a0, 16
 ; CHECK-NEXT:  .LBB87_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v6, v8, v24, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    addi a0, a2, -16
 ; CHECK-NEXT:    sltu a1, a2, a0
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v24, v16, v8, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v0, v16, v24, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vi v6, v24, 2
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vslideup.vi v8, v0, 2
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
index 21bbca00921d6b..e89283d1968eff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
@@ -595,47 +595,36 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1>
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    li a1, 128
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; CHECK-NEXT:    vlm.v v0, (a2)
 ; CHECK-NEXT:    addi a2, a0, 128
-; CHECK-NEXT:    vle8.v v8, (a2)
+; CHECK-NEXT:    vle8.v v24, (a2)
 ; CHECK-NEXT:    addi a2, a3, -128
 ; CHECK-NEXT:    sltu a4, a3, a2
 ; CHECK-NEXT:    addi a4, a4, -1
-; CHECK-NEXT:    vle8.v v24, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    and a2, a4, a2
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v6, v16, v8, v0.t
+; CHECK-NEXT:    vmseq.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    bltu a3, a1, .LBB51_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:  .LBB51_2:
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vv v16, v8, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v6
+; CHECK-NEXT:    vmseq.vv v0, v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -655,16 +644,15 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a1, a4, a1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a3, .LBB52_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:  .LBB52_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
   %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
@@ -684,16 +672,15 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m,
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a1, a4, a1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a3, .LBB53_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:  .LBB53_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
   %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
@@ -707,8 +694,7 @@ define <8 x i1> @icmp_eq_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_eq_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -718,8 +704,7 @@ define <8 x i1> @icmp_eq_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex
 ; CHECK-LABEL: icmp_eq_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -731,8 +716,7 @@ define <8 x i1> @icmp_eq_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z
 ; CHECK-LABEL: icmp_eq_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -744,8 +728,7 @@ define <8 x i1> @icmp_eq_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_eq_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -755,8 +738,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_eq_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -766,8 +748,7 @@ define <8 x i1> @icmp_ne_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ne_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -777,8 +758,7 @@ define <8 x i1> @icmp_ne_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex
 ; CHECK-LABEL: icmp_ne_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -790,8 +770,7 @@ define <8 x i1> @icmp_ne_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z
 ; CHECK-LABEL: icmp_ne_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -803,8 +782,7 @@ define <8 x i1> @icmp_ne_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ne_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -814,8 +792,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_ne_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -825,8 +802,7 @@ define <8 x i1> @icmp_ugt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ugt_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -836,8 +812,7 @@ define <8 x i1> @icmp_ugt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_ugt_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -849,8 +824,7 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ugt_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -862,8 +836,7 @@ define <8 x i1> @icmp_ugt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ugt_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -873,8 +846,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ugt_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -884,8 +856,7 @@ define <8 x i1> @icmp_uge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_uge_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -895,10 +866,9 @@ define <8 x i1> @icmp_uge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_uge_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -910,8 +880,7 @@ define <8 x i1> @icmp_uge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_uge_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -923,8 +892,7 @@ define <8 x i1> @icmp_uge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_uge_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -934,8 +902,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_uge_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -945,8 +912,7 @@ define <8 x i1> @icmp_ult_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ult_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -956,8 +922,7 @@ define <8 x i1> @icmp_ult_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_ult_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -969,8 +934,7 @@ define <8 x i1> @icmp_ult_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ult_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -982,8 +946,7 @@ define <8 x i1> @icmp_ult_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ult_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -993,8 +956,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ult_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1004,8 +966,7 @@ define <8 x i1> @icmp_sgt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sgt_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1015,8 +976,7 @@ define <8 x i1> @icmp_sgt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_sgt_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1028,8 +988,7 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_sgt_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1041,8 +1000,7 @@ define <8 x i1> @icmp_sgt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sgt_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1052,8 +1010,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sgt_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1063,8 +1020,7 @@ define <8 x i1> @icmp_sge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sge_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1074,10 +1030,9 @@ define <8 x i1> @icmp_sge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_sge_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1089,8 +1044,7 @@ define <8 x i1> @icmp_sge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_sge_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1102,8 +1056,7 @@ define <8 x i1> @icmp_sge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sge_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1113,8 +1066,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sge_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1124,8 +1076,7 @@ define <8 x i1> @icmp_slt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_slt_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1135,8 +1086,7 @@ define <8 x i1> @icmp_slt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_slt_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1148,8 +1098,7 @@ define <8 x i1> @icmp_slt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_slt_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1161,8 +1110,7 @@ define <8 x i1> @icmp_slt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_slt_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1172,8 +1120,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_slt_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1183,8 +1130,7 @@ define <8 x i1> @icmp_sle_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sle_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1194,8 +1140,7 @@ define <8 x i1> @icmp_sle_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_sle_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1207,10 +1152,9 @@ define <8 x i1> @icmp_sle_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_sle_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1222,8 +1166,7 @@ define <8 x i1> @icmp_sle_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sle_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1233,8 +1176,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sle_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1248,21 +1190,16 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v24, (a1)
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vle32.v v24, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 4
 ; CHECK-NEXT:    mv a0, a2
@@ -1271,26 +1208,21 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:  .LBB99_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vv v6, v8, v24, v0.t
+; CHECK-NEXT:    vmseq.vv v24, v8, v24, v0.t
 ; CHECK-NEXT:    addi a0, a2, -32
 ; CHECK-NEXT:    sltu a1, a2, a0
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vv v24, v16, v8, v0.t
+; CHECK-NEXT:    vmseq.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v6, v24, 4
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vslideup.vi v24, v0, 4
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -1310,17 +1242,17 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:  .LBB100_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    addi a2, a1, -32
 ; CHECK-NEXT:    sltu a1, a1, a2
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a1, a1, a2
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v0, v16, a0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v25, v8, 4
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vslideup.vi v8, v0, 4
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer
@@ -1340,17 +1272,17 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:  .LBB101_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    addi a2, a1, -32
 ; CHECK-NEXT:    sltu a1, a1, a2
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a1, a1, a2
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v0, v16, a0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v25, v8, 4
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vslideup.vi v8, v0, 4
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer
@@ -1364,8 +1296,7 @@ define <8 x i1> @icmp_eq_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_eq_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmseq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1380,18 +1311,16 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmseq.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmseq.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_eq_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1408,18 +1337,16 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmseq.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmseq.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_eq_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1431,8 +1358,7 @@ define <8 x i1> @icmp_eq_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_eq_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1442,8 +1368,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_eq_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1453,8 +1378,7 @@ define <8 x i1> @icmp_ne_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ne_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsne.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1469,18 +1393,16 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsne.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsne.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ne_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1497,18 +1419,16 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsne.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsne.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ne_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1520,8 +1440,7 @@ define <8 x i1> @icmp_ne_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ne_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1531,8 +1450,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_ne_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1542,8 +1460,7 @@ define <8 x i1> @icmp_ugt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ugt_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1558,18 +1475,16 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ugt_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1586,18 +1501,16 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ugt_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1609,8 +1522,7 @@ define <8 x i1> @icmp_ugt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ugt_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1620,8 +1532,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ugt_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1631,8 +1542,7 @@ define <8 x i1> @icmp_uge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_uge_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1647,20 +1557,18 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_uge_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1677,18 +1585,16 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsleu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsleu.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_uge_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsleu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1700,8 +1606,7 @@ define <8 x i1> @icmp_uge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_uge_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1711,8 +1616,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_uge_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1722,8 +1626,7 @@ define <8 x i1> @icmp_ult_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ult_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1738,18 +1641,16 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ult_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1766,18 +1667,16 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ult_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1789,8 +1688,7 @@ define <8 x i1> @icmp_ult_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ult_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1800,8 +1698,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ult_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1811,8 +1708,7 @@ define <8 x i1> @icmp_sgt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sgt_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1827,18 +1723,16 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sgt_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1855,18 +1749,16 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sgt_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1878,8 +1770,7 @@ define <8 x i1> @icmp_sgt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sgt_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1889,8 +1780,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sgt_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1900,8 +1790,7 @@ define <8 x i1> @icmp_sge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sge_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1916,20 +1805,18 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sge_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1946,18 +1833,16 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sge_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1969,8 +1854,7 @@ define <8 x i1> @icmp_sge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sge_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1980,8 +1864,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sge_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1991,8 +1874,7 @@ define <8 x i1> @icmp_slt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_slt_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2007,18 +1889,16 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_slt_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2035,18 +1915,16 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_slt_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2058,8 +1936,7 @@ define <8 x i1> @icmp_slt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_slt_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2069,8 +1946,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_slt_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2080,8 +1956,7 @@ define <8 x i1> @icmp_sle_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sle_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2096,18 +1971,16 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sle_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2124,20 +1997,18 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sle_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2149,8 +2020,7 @@ define <8 x i1> @icmp_sle_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sle_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2160,8 +2030,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sle_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index 4f16ce28bbb7e8..9ce045462bccc3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -246,12 +246,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; CHECK-NEXT:    vslidedown.vi v5, v0, 8
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v26, v0, 4
+; CHECK-NEXT:    addi a3, a1, 128
 ; CHECK-NEXT:    addi a2, a1, 512
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a2)
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    li a3, 48
-; CHECK-NEXT:    mul a2, a2, a3
+; CHECK-NEXT:    li a4, 48
+; CHECK-NEXT:    mul a2, a2, a4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -262,39 +263,38 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; CHECK-NEXT:    vle64.v v16, (a2)
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    addi a2, a7, -64
-; CHECK-NEXT:    sltu a3, a7, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a4, a3, a2
+; CHECK-NEXT:    sltu a4, a7, a2
+; CHECK-NEXT:    addi a4, a4, -1
+; CHECK-NEXT:    and a4, a4, a2
 ; CHECK-NEXT:    addi a2, a4, -32
-; CHECK-NEXT:    sltu a3, a4, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    and a3, a3, a2
-; CHECK-NEXT:    addi a2, a3, -16
-; CHECK-NEXT:    sltu a5, a3, a2
+; CHECK-NEXT:    sltu a5, a4, a2
 ; CHECK-NEXT:    addi a5, a5, -1
-; CHECK-NEXT:    and a2, a5, a2
+; CHECK-NEXT:    and a5, a5, a2
+; CHECK-NEXT:    addi a2, a5, -16
+; CHECK-NEXT:    sltu a6, a5, a2
+; CHECK-NEXT:    addi a6, a6, -1
+; CHECK-NEXT:    and a2, a6, a2
 ; CHECK-NEXT:    vslidedown.vi v0, v27, 2
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    li a5, 24
-; CHECK-NEXT:    mul a2, a2, a5
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    addi a5, a1, 128
-; CHECK-NEXT:    bltu a3, a2, .LBB16_2
+; CHECK-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    li t0, 24
+; CHECK-NEXT:    mul a6, a6, t0
+; CHECK-NEXT:    add a6, sp, a6
+; CHECK-NEXT:    addi a6, a6, 16
+; CHECK-NEXT:    vs8r.v v8, (a6) # Unknown-size Folded Spill
+; CHECK-NEXT:    bltu a5, a2, .LBB16_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 16
+; CHECK-NEXT:    li a5, 16
 ; CHECK-NEXT:  .LBB16_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v28, v26, 2
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a5)
-; CHECK-NEXT:    addi a5, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a3)
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vmv1r.v v0, v27
 ; CHECK-NEXT:    csrr a5, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
index 09b9e7ce4c53db..a2fc11492d1dd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
@@ -86,10 +86,9 @@ define <8 x i1> @isnan_v8f32(<8 x float> %x,  <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: isnan_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfclass.v v10, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 512
-; CHECK-NEXT:    vmseq.vx v8, v10, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> %m, i32 %evl)
   ret <8 x i1> %1
@@ -111,10 +110,9 @@ define <16 x i1> @isnan_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isnan_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfclass.v v12, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 256
-; CHECK-NEXT:    vmseq.vx v8, v12, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> %m, i32 %evl)
   ret <16 x i1> %1
@@ -162,10 +160,9 @@ define <4 x i1> @isposinf_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isposinf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfclass.v v10, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:    vmseq.vx v8, v10, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> %m, i32 %evl) ; 0x200 = "+inf"
   ret <4 x i1> %1
@@ -187,9 +184,8 @@ define <8 x i1> @isneginf_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isneginf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfclass.v v12, v8, v0.t
-; CHECK-NEXT:    vmseq.vi v8, v12, 1, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
+; CHECK-NEXT:    vmseq.vi v0, v8, 1, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> %m, i32 %evl) ; "-inf"
   ret <8 x i1> %1
@@ -212,9 +208,8 @@ define <16 x i1> @isfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext %e
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 126
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> %m, i32 %evl) ; 0x1f8 = "finite"
   ret <16 x i1> %1
@@ -239,9 +234,8 @@ define <16 x i1> @isposfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 112
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 448, <16 x i1> %m, i32 %evl) ; 0x1c0 = "+finite"
   ret <16 x i1> %1
@@ -265,9 +259,8 @@ define <16 x i1> @isnotfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 897
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> %m, i32 %evl) ; 0x207 = "inf|nan"
   ret <16 x i1> %1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll
index 1a75c50f2b646f..ab83617684ffe8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll
@@ -2575,12 +2575,10 @@ define <16 x i1> @fcmp_ogt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ogt_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2591,12 +2589,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2609,12 +2605,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2626,12 +2620,10 @@ define <16 x i1> @fcmp_oge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_oge_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2642,12 +2634,10 @@ define <16 x i1> @fcmp_oge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2660,12 +2650,10 @@ define <16 x i1> @fcmp_oge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2677,12 +2665,10 @@ define <16 x i1> @fcmp_olt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_olt_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2693,12 +2679,10 @@ define <16 x i1> @fcmp_olt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2711,12 +2695,10 @@ define <16 x i1> @fcmp_olt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2728,12 +2710,10 @@ define <16 x i1> @fcmp_ole_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ole_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2744,12 +2724,10 @@ define <16 x i1> @fcmp_ole_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2762,12 +2740,10 @@ define <16 x i1> @fcmp_ole_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2779,14 +2755,13 @@ define <16 x i1> @fcmp_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_one_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2797,14 +2772,13 @@ define <16 x i1> @fcmp_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2817,14 +2791,13 @@ define <16 x i1> @fcmp_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2836,9 +2809,9 @@ define <16 x i1> @fcmp_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ord_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2849,9 +2822,9 @@ define <16 x i1> @fcmp_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2864,9 +2837,9 @@ define <16 x i1> @fcmp_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2878,14 +2851,13 @@ define <16 x i1> @fcmp_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ueq_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2896,14 +2868,13 @@ define <16 x i1> @fcmp_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2916,14 +2887,13 @@ define <16 x i1> @fcmp_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2935,12 +2905,11 @@ define <16 x i1> @fcmp_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ugt_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2951,12 +2920,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2969,12 +2937,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2986,12 +2953,11 @@ define <16 x i1> @fcmp_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_uge_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3002,12 +2968,11 @@ define <16 x i1> @fcmp_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3020,12 +2985,11 @@ define <16 x i1> @fcmp_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3037,12 +3001,11 @@ define <16 x i1> @fcmp_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ult_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3053,12 +3016,11 @@ define <16 x i1> @fcmp_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3071,12 +3033,11 @@ define <16 x i1> @fcmp_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3088,12 +3049,11 @@ define <16 x i1> @fcmp_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ule_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3104,12 +3064,11 @@ define <16 x i1> @fcmp_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3122,12 +3081,11 @@ define <16 x i1> @fcmp_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3173,9 +3131,9 @@ define <16 x i1> @fcmp_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_uno_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3186,9 +3144,9 @@ define <16 x i1> @fcmp_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3201,9 +3159,9 @@ define <16 x i1> @fcmp_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3254,12 +3212,10 @@ define <32 x i1> @fcmp_ogt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3271,12 +3227,10 @@ define <32 x i1> @fcmp_ogt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3290,12 +3244,10 @@ define <32 x i1> @fcmp_ogt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3308,12 +3260,10 @@ define <32 x i1> @fcmp_oge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3325,12 +3275,10 @@ define <32 x i1> @fcmp_oge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3344,12 +3292,10 @@ define <32 x i1> @fcmp_oge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3362,12 +3308,10 @@ define <32 x i1> @fcmp_olt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3379,12 +3323,10 @@ define <32 x i1> @fcmp_olt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3398,12 +3340,10 @@ define <32 x i1> @fcmp_olt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3416,12 +3356,10 @@ define <32 x i1> @fcmp_ole_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3433,12 +3371,10 @@ define <32 x i1> @fcmp_ole_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3452,12 +3388,10 @@ define <32 x i1> @fcmp_ole_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3470,14 +3404,13 @@ define <32 x i1> @fcmp_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3489,14 +3422,13 @@ define <32 x i1> @fcmp_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3510,14 +3442,13 @@ define <32 x i1> @fcmp_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3530,9 +3461,9 @@ define <32 x i1> @fcmp_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3544,9 +3475,9 @@ define <32 x i1> @fcmp_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3560,9 +3491,9 @@ define <32 x i1> @fcmp_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3575,14 +3506,13 @@ define <32 x i1> @fcmp_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3594,14 +3524,13 @@ define <32 x i1> @fcmp_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3615,14 +3544,13 @@ define <32 x i1> @fcmp_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3635,12 +3563,11 @@ define <32 x i1> @fcmp_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3652,12 +3579,11 @@ define <32 x i1> @fcmp_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3671,12 +3597,11 @@ define <32 x i1> @fcmp_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3689,12 +3614,11 @@ define <32 x i1> @fcmp_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3706,12 +3630,11 @@ define <32 x i1> @fcmp_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3725,12 +3648,11 @@ define <32 x i1> @fcmp_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3743,12 +3665,11 @@ define <32 x i1> @fcmp_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3760,12 +3681,11 @@ define <32 x i1> @fcmp_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3779,12 +3699,11 @@ define <32 x i1> @fcmp_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3797,12 +3716,11 @@ define <32 x i1> @fcmp_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3814,12 +3732,11 @@ define <32 x i1> @fcmp_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3833,12 +3750,11 @@ define <32 x i1> @fcmp_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3888,9 +3804,9 @@ define <32 x i1> @fcmp_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3902,9 +3818,9 @@ define <32 x i1> @fcmp_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3918,9 +3834,9 @@ define <32 x i1> @fcmp_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -5866,12 +5782,10 @@ define <8 x i1> @fcmp_ogt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ogt_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5882,12 +5796,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5900,12 +5812,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5917,12 +5827,10 @@ define <8 x i1> @fcmp_oge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_oge_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5933,12 +5841,10 @@ define <8 x i1> @fcmp_oge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5951,12 +5857,10 @@ define <8 x i1> @fcmp_oge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5968,12 +5872,10 @@ define <8 x i1> @fcmp_olt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_olt_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5984,12 +5886,10 @@ define <8 x i1> @fcmp_olt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6002,12 +5902,10 @@ define <8 x i1> @fcmp_olt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6019,12 +5917,10 @@ define <8 x i1> @fcmp_ole_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ole_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6035,12 +5931,10 @@ define <8 x i1> @fcmp_ole_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6053,12 +5947,10 @@ define <8 x i1> @fcmp_ole_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6070,14 +5962,13 @@ define <8 x i1> @fcmp_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_one_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6088,14 +5979,13 @@ define <8 x i1> @fcmp_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6108,14 +5998,13 @@ define <8 x i1> @fcmp_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6127,9 +6016,9 @@ define <8 x i1> @fcmp_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ord_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6140,9 +6029,9 @@ define <8 x i1> @fcmp_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6155,9 +6044,9 @@ define <8 x i1> @fcmp_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6169,14 +6058,13 @@ define <8 x i1> @fcmp_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ueq_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6187,14 +6075,13 @@ define <8 x i1> @fcmp_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6207,14 +6094,13 @@ define <8 x i1> @fcmp_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6226,12 +6112,11 @@ define <8 x i1> @fcmp_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ugt_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6242,12 +6127,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6260,12 +6144,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6277,12 +6160,11 @@ define <8 x i1> @fcmp_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_uge_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6293,12 +6175,11 @@ define <8 x i1> @fcmp_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6311,12 +6192,11 @@ define <8 x i1> @fcmp_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6328,12 +6208,11 @@ define <8 x i1> @fcmp_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ult_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6344,12 +6223,11 @@ define <8 x i1> @fcmp_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6362,12 +6240,11 @@ define <8 x i1> @fcmp_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6379,12 +6256,11 @@ define <8 x i1> @fcmp_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ule_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6395,12 +6271,11 @@ define <8 x i1> @fcmp_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6413,12 +6288,11 @@ define <8 x i1> @fcmp_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6464,9 +6338,9 @@ define <8 x i1> @fcmp_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_uno_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6477,9 +6351,9 @@ define <8 x i1> @fcmp_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6492,9 +6366,9 @@ define <8 x i1> @fcmp_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6541,12 +6415,10 @@ define <16 x i1> @fcmp_ogt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ogt_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6557,12 +6429,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6575,12 +6445,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6592,12 +6460,10 @@ define <16 x i1> @fcmp_oge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_oge_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6608,12 +6474,10 @@ define <16 x i1> @fcmp_oge_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6626,12 +6490,10 @@ define <16 x i1> @fcmp_oge_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6643,12 +6505,10 @@ define <16 x i1> @fcmp_olt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_olt_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6659,12 +6519,10 @@ define <16 x i1> @fcmp_olt_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6677,12 +6535,10 @@ define <16 x i1> @fcmp_olt_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6694,12 +6550,10 @@ define <16 x i1> @fcmp_ole_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ole_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6710,12 +6564,10 @@ define <16 x i1> @fcmp_ole_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6728,12 +6580,10 @@ define <16 x i1> @fcmp_ole_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6745,14 +6595,13 @@ define <16 x i1> @fcmp_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_one_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6763,14 +6612,13 @@ define <16 x i1> @fcmp_one_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6783,14 +6631,13 @@ define <16 x i1> @fcmp_one_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6802,9 +6649,9 @@ define <16 x i1> @fcmp_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ord_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6815,9 +6662,9 @@ define <16 x i1> @fcmp_ord_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6830,9 +6677,9 @@ define <16 x i1> @fcmp_ord_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6844,14 +6691,13 @@ define <16 x i1> @fcmp_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ueq_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6862,14 +6708,13 @@ define <16 x i1> @fcmp_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6882,14 +6727,13 @@ define <16 x i1> @fcmp_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6901,12 +6745,11 @@ define <16 x i1> @fcmp_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ugt_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6917,12 +6760,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6935,12 +6777,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6952,12 +6793,11 @@ define <16 x i1> @fcmp_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_uge_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6968,12 +6808,11 @@ define <16 x i1> @fcmp_uge_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6986,12 +6825,11 @@ define <16 x i1> @fcmp_uge_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7003,12 +6841,11 @@ define <16 x i1> @fcmp_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ult_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -7019,12 +6856,11 @@ define <16 x i1> @fcmp_ult_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7037,12 +6873,11 @@ define <16 x i1> @fcmp_ult_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7054,12 +6889,11 @@ define <16 x i1> @fcmp_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ule_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -7070,12 +6904,11 @@ define <16 x i1> @fcmp_ule_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7088,12 +6921,11 @@ define <16 x i1> @fcmp_ule_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7139,9 +6971,9 @@ define <16 x i1> @fcmp_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_uno_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -7152,9 +6984,9 @@ define <16 x i1> @fcmp_uno_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7167,9 +6999,9 @@ define <16 x i1> @fcmp_uno_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -8482,12 +8314,10 @@ define <4 x i1> @fcmp_ogt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ogt_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8498,12 +8328,10 @@ define <4 x i1> @fcmp_ogt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8516,12 +8344,10 @@ define <4 x i1> @fcmp_ogt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8533,12 +8359,10 @@ define <4 x i1> @fcmp_oge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_oge_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8549,12 +8373,10 @@ define <4 x i1> @fcmp_oge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8567,12 +8389,10 @@ define <4 x i1> @fcmp_oge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8584,12 +8404,10 @@ define <4 x i1> @fcmp_olt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_olt_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8600,12 +8418,10 @@ define <4 x i1> @fcmp_olt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8618,12 +8434,10 @@ define <4 x i1> @fcmp_olt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8635,12 +8449,10 @@ define <4 x i1> @fcmp_ole_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ole_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8651,12 +8463,10 @@ define <4 x i1> @fcmp_ole_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8669,12 +8479,10 @@ define <4 x i1> @fcmp_ole_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8686,14 +8494,13 @@ define <4 x i1> @fcmp_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_one_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8704,14 +8511,13 @@ define <4 x i1> @fcmp_one_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8724,14 +8530,13 @@ define <4 x i1> @fcmp_one_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8743,9 +8548,9 @@ define <4 x i1> @fcmp_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ord_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8756,9 +8561,9 @@ define <4 x i1> @fcmp_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8771,9 +8576,9 @@ define <4 x i1> @fcmp_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8785,14 +8590,13 @@ define <4 x i1> @fcmp_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ueq_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8803,14 +8607,13 @@ define <4 x i1> @fcmp_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8823,14 +8626,13 @@ define <4 x i1> @fcmp_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8842,12 +8644,11 @@ define <4 x i1> @fcmp_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ugt_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8858,12 +8659,11 @@ define <4 x i1> @fcmp_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8876,12 +8676,11 @@ define <4 x i1> @fcmp_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8893,12 +8692,11 @@ define <4 x i1> @fcmp_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_uge_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8909,12 +8707,11 @@ define <4 x i1> @fcmp_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8927,12 +8724,11 @@ define <4 x i1> @fcmp_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8944,12 +8740,11 @@ define <4 x i1> @fcmp_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ult_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8960,12 +8755,11 @@ define <4 x i1> @fcmp_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8978,12 +8772,11 @@ define <4 x i1> @fcmp_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8995,12 +8788,11 @@ define <4 x i1> @fcmp_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ule_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -9011,12 +8803,11 @@ define <4 x i1> @fcmp_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9029,12 +8820,11 @@ define <4 x i1> @fcmp_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9080,9 +8870,9 @@ define <4 x i1> @fcmp_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_uno_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -9093,9 +8883,9 @@ define <4 x i1> @fcmp_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9108,9 +8898,9 @@ define <4 x i1> @fcmp_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9157,12 +8947,10 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ogt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9173,12 +8961,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9191,12 +8977,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9208,12 +8992,10 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_oge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9224,12 +9006,10 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9242,12 +9022,10 @@ define <8 x i1> @fcmp_oge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9259,12 +9037,10 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_olt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9275,12 +9051,10 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9293,12 +9067,10 @@ define <8 x i1> @fcmp_olt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9310,12 +9082,10 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ole_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9326,12 +9096,10 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9344,12 +9112,10 @@ define <8 x i1> @fcmp_ole_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9361,14 +9127,13 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_one_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9379,14 +9144,13 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9399,14 +9163,13 @@ define <8 x i1> @fcmp_one_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9418,9 +9181,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ord_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9431,9 +9194,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9446,9 +9209,9 @@ define <8 x i1> @fcmp_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9460,14 +9223,13 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ueq_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9478,14 +9240,13 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9498,14 +9259,13 @@ define <8 x i1> @fcmp_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9517,12 +9277,11 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ugt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9533,12 +9292,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9551,12 +9309,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9568,12 +9325,11 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_uge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9584,12 +9340,11 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9602,12 +9357,11 @@ define <8 x i1> @fcmp_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9619,12 +9373,11 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ult_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9635,12 +9388,11 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9653,12 +9405,11 @@ define <8 x i1> @fcmp_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9670,12 +9421,11 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ule_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9686,12 +9436,11 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9704,12 +9453,11 @@ define <8 x i1> @fcmp_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9755,9 +9503,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_uno_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9768,9 +9516,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9783,9 +9531,9 @@ define <8 x i1> @fcmp_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll
index 83037baf3fabcf..e377e37b3a2bde 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll
@@ -2137,9 +2137,9 @@ define <16 x i1> @fcmps_oeq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_oeq_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2149,9 +2149,9 @@ define <16 x i1> @fcmps_oeq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_oeq_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2163,9 +2163,9 @@ define <16 x i1> @fcmps_oeq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_oeq_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2313,9 +2313,9 @@ define <16 x i1> @fcmps_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_one_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2325,9 +2325,9 @@ define <16 x i1> @fcmps_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_one_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2339,9 +2339,9 @@ define <16 x i1> @fcmps_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_one_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2353,9 +2353,9 @@ define <16 x i1> @fcmps_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ord_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2366,9 +2366,9 @@ define <16 x i1> @fcmps_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2381,9 +2381,9 @@ define <16 x i1> @fcmps_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2395,9 +2395,9 @@ define <16 x i1> @fcmps_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ueq_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2407,9 +2407,9 @@ define <16 x i1> @fcmps_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ueq_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2421,9 +2421,9 @@ define <16 x i1> @fcmps_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ueq_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2435,8 +2435,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ugt_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2446,8 +2446,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ugt_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2459,8 +2459,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ugt_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2472,8 +2472,8 @@ define <16 x i1> @fcmps_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_uge_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2483,8 +2483,8 @@ define <16 x i1> @fcmps_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_uge_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2496,8 +2496,8 @@ define <16 x i1> @fcmps_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_uge_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2509,8 +2509,8 @@ define <16 x i1> @fcmps_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ult_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2520,8 +2520,8 @@ define <16 x i1> @fcmps_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ult_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2533,8 +2533,8 @@ define <16 x i1> @fcmps_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ult_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2546,8 +2546,8 @@ define <16 x i1> @fcmps_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ule_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2557,8 +2557,8 @@ define <16 x i1> @fcmps_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ule_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2570,8 +2570,8 @@ define <16 x i1> @fcmps_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ule_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2583,9 +2583,9 @@ define <16 x i1> @fcmps_une_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_une_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2595,9 +2595,9 @@ define <16 x i1> @fcmps_une_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_une_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2609,9 +2609,9 @@ define <16 x i1> @fcmps_une_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_une_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2623,10 +2623,10 @@ define <16 x i1> @fcmps_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_uno_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2637,10 +2637,10 @@ define <16 x i1> @fcmps_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2653,10 +2653,10 @@ define <16 x i1> @fcmps_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2670,9 +2670,9 @@ define <32 x i1> @fcmps_oeq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2683,9 +2683,9 @@ define <32 x i1> @fcmps_oeq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2698,9 +2698,9 @@ define <32 x i1> @fcmps_oeq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2861,9 +2861,9 @@ define <32 x i1> @fcmps_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2874,9 +2874,9 @@ define <32 x i1> @fcmps_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2889,9 +2889,9 @@ define <32 x i1> @fcmps_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2904,9 +2904,9 @@ define <32 x i1> @fcmps_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2918,9 +2918,9 @@ define <32 x i1> @fcmps_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2934,9 +2934,9 @@ define <32 x i1> @fcmps_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2949,9 +2949,9 @@ define <32 x i1> @fcmps_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2962,9 +2962,9 @@ define <32 x i1> @fcmps_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2977,9 +2977,9 @@ define <32 x i1> @fcmps_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2992,8 +2992,8 @@ define <32 x i1> @fcmps_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3004,8 +3004,8 @@ define <32 x i1> @fcmps_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3018,8 +3018,8 @@ define <32 x i1> @fcmps_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3032,8 +3032,8 @@ define <32 x i1> @fcmps_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3044,8 +3044,8 @@ define <32 x i1> @fcmps_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3058,8 +3058,8 @@ define <32 x i1> @fcmps_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3072,8 +3072,8 @@ define <32 x i1> @fcmps_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3084,8 +3084,8 @@ define <32 x i1> @fcmps_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3098,8 +3098,8 @@ define <32 x i1> @fcmps_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3112,8 +3112,8 @@ define <32 x i1> @fcmps_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3124,8 +3124,8 @@ define <32 x i1> @fcmps_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3138,8 +3138,8 @@ define <32 x i1> @fcmps_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3152,9 +3152,9 @@ define <32 x i1> @fcmps_une_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3165,9 +3165,9 @@ define <32 x i1> @fcmps_une_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3180,9 +3180,9 @@ define <32 x i1> @fcmps_une_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3195,10 +3195,10 @@ define <32 x i1> @fcmps_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3210,10 +3210,10 @@ define <32 x i1> @fcmps_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3227,10 +3227,10 @@ define <32 x i1> @fcmps_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmfle.vf v4, v12, fa0
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -4839,9 +4839,9 @@ define <8 x i1> @fcmps_oeq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_oeq_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -4851,9 +4851,9 @@ define <8 x i1> @fcmps_oeq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_oeq_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -4865,9 +4865,9 @@ define <8 x i1> @fcmps_oeq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_oeq_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5015,9 +5015,9 @@ define <8 x i1> @fcmps_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_one_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5027,9 +5027,9 @@ define <8 x i1> @fcmps_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_one_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5041,9 +5041,9 @@ define <8 x i1> @fcmps_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_one_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5055,9 +5055,9 @@ define <8 x i1> @fcmps_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ord_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5068,9 +5068,9 @@ define <8 x i1> @fcmps_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5083,9 +5083,9 @@ define <8 x i1> @fcmps_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5097,9 +5097,9 @@ define <8 x i1> @fcmps_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ueq_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5109,9 +5109,9 @@ define <8 x i1> @fcmps_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ueq_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5123,9 +5123,9 @@ define <8 x i1> @fcmps_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ueq_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5137,8 +5137,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ugt_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5148,8 +5148,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ugt_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5161,8 +5161,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ugt_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5174,8 +5174,8 @@ define <8 x i1> @fcmps_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_uge_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5185,8 +5185,8 @@ define <8 x i1> @fcmps_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_uge_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5198,8 +5198,8 @@ define <8 x i1> @fcmps_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_uge_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5211,8 +5211,8 @@ define <8 x i1> @fcmps_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ult_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5222,8 +5222,8 @@ define <8 x i1> @fcmps_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ult_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5235,8 +5235,8 @@ define <8 x i1> @fcmps_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ult_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5248,8 +5248,8 @@ define <8 x i1> @fcmps_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ule_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5259,8 +5259,8 @@ define <8 x i1> @fcmps_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ule_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5272,8 +5272,8 @@ define <8 x i1> @fcmps_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ule_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5285,9 +5285,9 @@ define <8 x i1> @fcmps_une_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_une_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5297,9 +5297,9 @@ define <8 x i1> @fcmps_une_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_une_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5311,9 +5311,9 @@ define <8 x i1> @fcmps_une_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_une_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5325,10 +5325,10 @@ define <8 x i1> @fcmps_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_uno_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5339,10 +5339,10 @@ define <8 x i1> @fcmps_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5355,10 +5355,10 @@ define <8 x i1> @fcmps_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5371,9 +5371,9 @@ define <16 x i1> @fcmps_oeq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_oeq_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5383,9 +5383,9 @@ define <16 x i1> @fcmps_oeq_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_oeq_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5397,9 +5397,9 @@ define <16 x i1> @fcmps_oeq_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_oeq_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5547,9 +5547,9 @@ define <16 x i1> @fcmps_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_one_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5559,9 +5559,9 @@ define <16 x i1> @fcmps_one_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_one_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5573,9 +5573,9 @@ define <16 x i1> @fcmps_one_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_one_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5587,9 +5587,9 @@ define <16 x i1> @fcmps_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ord_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5600,9 +5600,9 @@ define <16 x i1> @fcmps_ord_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5615,9 +5615,9 @@ define <16 x i1> @fcmps_ord_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5629,9 +5629,9 @@ define <16 x i1> @fcmps_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ueq_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5641,9 +5641,9 @@ define <16 x i1> @fcmps_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ueq_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5655,9 +5655,9 @@ define <16 x i1> @fcmps_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ueq_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5669,8 +5669,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ugt_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5680,8 +5680,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ugt_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5693,8 +5693,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ugt_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5706,8 +5706,8 @@ define <16 x i1> @fcmps_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_uge_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5717,8 +5717,8 @@ define <16 x i1> @fcmps_uge_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_uge_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5730,8 +5730,8 @@ define <16 x i1> @fcmps_uge_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_uge_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5743,8 +5743,8 @@ define <16 x i1> @fcmps_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ult_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5754,8 +5754,8 @@ define <16 x i1> @fcmps_ult_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ult_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5767,8 +5767,8 @@ define <16 x i1> @fcmps_ult_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ult_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5780,8 +5780,8 @@ define <16 x i1> @fcmps_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ule_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5791,8 +5791,8 @@ define <16 x i1> @fcmps_ule_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ule_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5804,8 +5804,8 @@ define <16 x i1> @fcmps_ule_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ule_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5817,9 +5817,9 @@ define <16 x i1> @fcmps_une_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_une_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5829,9 +5829,9 @@ define <16 x i1> @fcmps_une_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_une_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5843,9 +5843,9 @@ define <16 x i1> @fcmps_une_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_une_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5857,10 +5857,10 @@ define <16 x i1> @fcmps_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_uno_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5871,10 +5871,10 @@ define <16 x i1> @fcmps_uno_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5887,10 +5887,10 @@ define <16 x i1> @fcmps_uno_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6967,9 +6967,9 @@ define <4 x i1> @fcmps_oeq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_oeq_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -6979,9 +6979,9 @@ define <4 x i1> @fcmps_oeq_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_oeq_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -6993,9 +6993,9 @@ define <4 x i1> @fcmps_oeq_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_oeq_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7143,9 +7143,9 @@ define <4 x i1> @fcmps_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_one_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7155,9 +7155,9 @@ define <4 x i1> @fcmps_one_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_one_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7169,9 +7169,9 @@ define <4 x i1> @fcmps_one_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_one_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7183,9 +7183,9 @@ define <4 x i1> @fcmps_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ord_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7196,9 +7196,9 @@ define <4 x i1> @fcmps_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7211,9 +7211,9 @@ define <4 x i1> @fcmps_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7225,9 +7225,9 @@ define <4 x i1> @fcmps_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ueq_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7237,9 +7237,9 @@ define <4 x i1> @fcmps_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ueq_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7251,9 +7251,9 @@ define <4 x i1> @fcmps_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ueq_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7265,8 +7265,8 @@ define <4 x i1> @fcmps_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ugt_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7276,8 +7276,8 @@ define <4 x i1> @fcmps_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7289,8 +7289,8 @@ define <4 x i1> @fcmps_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7302,8 +7302,8 @@ define <4 x i1> @fcmps_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uge_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7313,8 +7313,8 @@ define <4 x i1> @fcmps_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7326,8 +7326,8 @@ define <4 x i1> @fcmps_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7339,8 +7339,8 @@ define <4 x i1> @fcmps_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ult_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7350,8 +7350,8 @@ define <4 x i1> @fcmps_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7363,8 +7363,8 @@ define <4 x i1> @fcmps_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7376,8 +7376,8 @@ define <4 x i1> @fcmps_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ule_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7387,8 +7387,8 @@ define <4 x i1> @fcmps_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7400,8 +7400,8 @@ define <4 x i1> @fcmps_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7413,9 +7413,9 @@ define <4 x i1> @fcmps_une_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_une_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7425,9 +7425,9 @@ define <4 x i1> @fcmps_une_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_une_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7439,9 +7439,9 @@ define <4 x i1> @fcmps_une_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_une_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7453,10 +7453,10 @@ define <4 x i1> @fcmps_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uno_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7467,10 +7467,10 @@ define <4 x i1> @fcmps_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7483,10 +7483,10 @@ define <4 x i1> @fcmps_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7499,9 +7499,9 @@ define <8 x i1> @fcmps_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_oeq_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7511,9 +7511,9 @@ define <8 x i1> @fcmps_oeq_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_oeq_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7525,9 +7525,9 @@ define <8 x i1> @fcmps_oeq_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_oeq_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7675,9 +7675,9 @@ define <8 x i1> @fcmps_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_one_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7687,9 +7687,9 @@ define <8 x i1> @fcmps_one_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_one_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7701,9 +7701,9 @@ define <8 x i1> @fcmps_one_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_one_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7715,9 +7715,9 @@ define <8 x i1> @fcmps_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ord_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7728,9 +7728,9 @@ define <8 x i1> @fcmps_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7743,9 +7743,9 @@ define <8 x i1> @fcmps_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7757,9 +7757,9 @@ define <8 x i1> @fcmps_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ueq_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7769,9 +7769,9 @@ define <8 x i1> @fcmps_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ueq_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7783,9 +7783,9 @@ define <8 x i1> @fcmps_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ueq_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7797,8 +7797,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ugt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7808,8 +7808,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7821,8 +7821,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7834,8 +7834,8 @@ define <8 x i1> @fcmps_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7845,8 +7845,8 @@ define <8 x i1> @fcmps_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7858,8 +7858,8 @@ define <8 x i1> @fcmps_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7871,8 +7871,8 @@ define <8 x i1> @fcmps_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ult_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7882,8 +7882,8 @@ define <8 x i1> @fcmps_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7895,8 +7895,8 @@ define <8 x i1> @fcmps_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7908,8 +7908,8 @@ define <8 x i1> @fcmps_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ule_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7919,8 +7919,8 @@ define <8 x i1> @fcmps_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7932,8 +7932,8 @@ define <8 x i1> @fcmps_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7945,9 +7945,9 @@ define <8 x i1> @fcmps_une_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_une_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7957,9 +7957,9 @@ define <8 x i1> @fcmps_une_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_une_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7971,9 +7971,9 @@ define <8 x i1> @fcmps_une_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_une_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7985,10 +7985,10 @@ define <8 x i1> @fcmps_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uno_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7999,10 +7999,10 @@ define <8 x i1> @fcmps_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -8015,10 +8015,10 @@ define <8 x i1> @fcmps_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 466448a7a05a21..e3ab3b6da25078 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -26,8 +26,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vmsltu.vx v12, v10, a0
-; RV64-NEXT:    vmand.mm v9, v9, v12
+; RV64-NEXT:    vmsltu.vx v2, v10, a0
+; RV64-NEXT:    vmand.mm v9, v9, v2
 ; RV64-NEXT:    vmandn.mm v8, v8, v9
 ; RV64-NEXT:    vmand.mm v9, v0, v9
 ; RV64-NEXT:    vmor.mm v0, v9, v8
@@ -48,8 +48,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64ZVFHMIN-NEXT:    vid.v v10
-; RV64ZVFHMIN-NEXT:    vmsltu.vx v12, v10, a0
-; RV64ZVFHMIN-NEXT:    vmand.mm v9, v9, v12
+; RV64ZVFHMIN-NEXT:    vmsltu.vx v2, v10, a0
+; RV64ZVFHMIN-NEXT:    vmand.mm v9, v9, v2
 ; RV64ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
 ; RV64ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
 ; RV64ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
index d05f580ea7d222..a454e1dbc967c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
@@ -456,15 +456,29 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32
 define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) {
 ; CHECK-LABEL: select_evl_v32i64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v24, (a0)
-; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle64.v v24, (a0)
+; CHECK-NEXT:    vle64.v v16, (a0)
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v0, v0, 2
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17)
   ret <32 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
index 9c4706b2bda764..b5346b355743ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
@@ -135,21 +135,19 @@ declare <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half>, <vscale
 define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -179,21 +177,19 @@ declare <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half>, <vsca
 define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI8_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -223,21 +219,19 @@ declare <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half>, <vsca
 define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI10_0)
 ; CHECK-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x half> %v
@@ -351,21 +345,19 @@ declare <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float>, <vscal
 define <vscale x 4 x float> @vp_floor_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -395,21 +387,19 @@ declare <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float>, <vscal
 define <vscale x 8 x float> @vp_floor_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -439,21 +429,19 @@ declare <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float>, <vs
 define <vscale x 16 x float> @vp_floor_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -525,21 +513,19 @@ declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vsc
 define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -569,21 +555,19 @@ declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vsc
 define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -613,21 +597,19 @@ declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vsc
 define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv7f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -657,21 +639,19 @@ declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vsc
 define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -714,7 +694,7 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 3
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
 ; CHECK-NEXT:    sub a2, a0, a1
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
@@ -722,13 +702,11 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a2, 2
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
 ; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index 386f23f68c357e..13107103a5ecfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -103,11 +103,11 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v12, v10, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v10, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -122,11 +122,11 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmax.vv v8, v8, v14
+; ZVFH-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv8f16_vv:
@@ -136,11 +136,11 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v16, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v16, v0
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -155,11 +155,11 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v16, v12, v12
-; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v16, v8, v12, v0
+; ZVFH-NEXT:    vmv1r.v v0, v4
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
-; ZVFH-NEXT:    vfmax.vv v8, v8, v20
+; ZVFH-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv16f16_vv:
@@ -167,26 +167,36 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 10
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 10
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -199,23 +209,35 @@ declare <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half>, <vscal
 define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale x 32 x half> %b) nounwind {
 ; ZVFH-LABEL: vfmax_nxv32f16_vv:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 1
+; ZVFH-NEXT:    sub sp, sp, a0
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFH-NEXT:    vmfeq.vv v24, v16, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v24
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 1
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    addi sp, sp, 16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv32f16_vv:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    li a1, 18
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -225,45 +247,63 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v24, v24
+; ZVFHMIN-NEXT:    vmfeq.vv v16, v24, v24
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v3
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFHMIN-NEXT:    vfmax.vv v24, v8, v16
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v8, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v16, v24, v24
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v16, v0
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    li a1, 18
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -312,11 +352,11 @@ define <vscale x 4 x float> @vfmax_nxv4f32_vv(<vscale x 4 x float> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %v
@@ -329,11 +369,11 @@ define <vscale x 8 x float> @vfmax_nxv8f32_vv(<vscale x 8 x float> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
   ret <vscale x 8 x float> %v
@@ -344,13 +384,23 @@ declare <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float>, <vsc
 define <vscale x 16 x float> @vfmax_nxv16f32_vv(<vscale x 16 x float> %a, <vscale x 16 x float> %b) nounwind {
 ; CHECK-LABEL: vfmax_nxv16f32_vv:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
   ret <vscale x 16 x float> %v
@@ -380,11 +430,11 @@ define <vscale x 2 x double> @vfmax_nxv2f64_vv(<vscale x 2 x double> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %v
@@ -397,11 +447,11 @@ define <vscale x 4 x double> @vfmax_nxv4f64_vv(<vscale x 4 x double> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
   ret <vscale x 4 x double> %v
@@ -412,13 +462,23 @@ declare <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double>, <vsca
 define <vscale x 8 x double> @vfmax_nxv8f64_vv(<vscale x 8 x double> %a, <vscale x 8 x double> %b) nounwind {
 ; CHECK-LABEL: vfmax_nxv8f64_vv:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
   ret <vscale x 8 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
index b78b8663eac90b..9f5cfe0e533d31 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
@@ -174,18 +174,16 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v14, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -214,11 +212,11 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v10, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -233,12 +231,10 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -250,18 +246,16 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v20, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -274,11 +268,11 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmax.vv v8, v8, v14
+; ZVFH-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked:
@@ -290,11 +284,11 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v12, v16, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -309,12 +303,10 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v16, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -322,37 +314,22 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -363,11 +340,11 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v16, v12, v12
-; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v16, v8, v12, v0
+; ZVFH-NEXT:    vmv1r.v v0, v4
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
-; ZVFH-NEXT:    vfmax.vv v8, v8, v20
+; ZVFH-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16_unmasked:
@@ -375,9 +352,10 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    li a2, 10
+; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
@@ -385,18 +363,27 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 10
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -417,14 +404,12 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFH-NEXT:    vmv1r.v v7, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; ZVFH-NEXT:    addi a0, sp, 16
 ; ZVFH-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v7
 ; ZVFH-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -440,17 +425,11 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 34
+; ZVFHMIN-NEXT:    li a2, 24
 ; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 25
-; ZVFHMIN-NEXT:    mul a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v16, v8
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -459,135 +438,68 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    and a3, a4, a3
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    li a5, 24
-; ZVFHMIN-NEXT:    mul a4, a4, a5
-; ZVFHMIN-NEXT:    add a4, sp, a4
-; ZVFHMIN-NEXT:    addi a4, a4, 16
-; ZVFHMIN-NEXT:    vs1r.v v0, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs1r.v v0, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vslidedown.vx v6, v0, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    li a4, 25
-; ZVFHMIN-NEXT:    mul a2, a2, a4
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl1r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB10_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB10_2:
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
+; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 25
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v24, v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 25
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 25
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 34
+; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
@@ -599,13 +511,25 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfmax_vv_nxv32f16_unmasked:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a1, vlenb
+; ZVFH-NEXT:    slli a1, a1, 1
+; ZVFH-NEXT:    sub sp, sp, a1
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFH-NEXT:    vmfeq.vv v24, v16, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v24
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 1
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    addi sp, sp, 16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16_unmasked:
@@ -613,9 +537,9 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 5
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -628,80 +552,64 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v7, v24, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v8, v16
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    li a4, 24
-; ZVFHMIN-NEXT:    mul a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v24, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v16
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB11_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB11_2:
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v24, v16, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v3
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v8, v0
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v0, v16
 ; ZVFHMIN-NEXT:    vmv8r.v v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -784,12 +692,10 @@ define <vscale x 4 x float> @vfmax_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -803,11 +709,11 @@ define <vscale x 4 x float> @vfmax_vv_nxv4f32_unmasked(<vscale x 4 x float> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x float> %v
@@ -820,12 +726,10 @@ define <vscale x 8 x float> @vfmax_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -839,11 +743,11 @@ define <vscale x 8 x float> @vfmax_vv_nxv8f32_unmasked(<vscale x 8 x float> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x float> %v
@@ -890,12 +794,10 @@ define <vscale x 2 x double> @vfmax_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -909,11 +811,11 @@ define <vscale x 2 x double> @vfmax_vv_nxv2f64_unmasked(<vscale x 2 x double> %v
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v14
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
@@ -926,12 +828,10 @@ define <vscale x 4 x double> @vfmax_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -945,11 +845,11 @@ define <vscale x 4 x double> @vfmax_vv_nxv4f64_unmasked(<vscale x 4 x double> %v
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v20
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x double> %v
@@ -968,14 +868,12 @@ define <vscale x 8 x double> @vfmax_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -992,13 +890,25 @@ define <vscale x 8 x double> @vfmax_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 define <vscale x 8 x double> @vfmax_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_nxv8f64_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1012,23 +922,24 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 42
+; CHECK-NEXT:    li a3, 26
 ; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a3, a1, 3
 ; CHECK-NEXT:    add a3, a0, a3
 ; CHECK-NEXT:    vl8re64.v v24, (a3)
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
+; CHECK-NEXT:    slli a4, a3, 4
+; CHECK-NEXT:    add a3, a4, a3
 ; CHECK-NEXT:    add a3, sp, a3
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    slli a4, a4, 5
+; CHECK-NEXT:    slli a4, a4, 3
 ; CHECK-NEXT:    add a4, sp, a4
 ; CHECK-NEXT:    addi a4, a4, 16
 ; CHECK-NEXT:    vs1r.v v0, (a4) # Unknown-size Folded Spill
@@ -1039,7 +950,7 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    and a3, a4, a3
 ; CHECK-NEXT:    vl8re64.v v0, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a4, a0, 5
+; CHECK-NEXT:    slli a4, a0, 3
 ; CHECK-NEXT:    add a0, a4, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1047,94 +958,67 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v24
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a3, a0, 4
+; CHECK-NEXT:    add a0, a3, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v24, v16, v24, v0
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v17, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a0, a0, a3
+; CHECK-NEXT:    slli a3, a0, 4
+; CHECK-NEXT:    add a0, a3, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v17
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vfmax.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a3, a0, 4
+; CHECK-NEXT:    add a0, a3, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB28_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB28_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 5
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl1r.v v7, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a1, a0, 5
+; CHECK-NEXT:    slli a1, a0, 3
 ; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 42
+; CHECK-NEXT:    li a1, 26
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
@@ -1149,37 +1033,56 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double>
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    li a3, 26
 ; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a3, a1, 4
+; CHECK-NEXT:    add a1, a3, a1
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a3, a1, 3
 ; CHECK-NEXT:    add a3, a0, a3
-; CHECK-NEXT:    vl8re64.v v24, (a3)
-; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a3)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    sub a0, a2, a1
 ; CHECK-NEXT:    sltu a3, a2, a0
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a0, a3, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
-; CHECK-NEXT:    vfmax.vv v8, v16, v8
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a3, a0, 3
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a3, a0, 3
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a3, a0, 3
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB29_2
 ; CHECK-NEXT:  # %bb.1:
@@ -1187,25 +1090,31 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double>
 ; CHECK-NEXT:  .LBB29_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v8, v8
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    vfmax.vv v8, v8, v24
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    li a1, 26
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index 48baa12aa2e59b..e5a0fdeda120f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -103,11 +103,11 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v12, v10, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v10, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -122,11 +122,11 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmin.vv v8, v8, v14
+; ZVFH-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv8f16_vv:
@@ -136,11 +136,11 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v16, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v16, v0
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -155,11 +155,11 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v16, v12, v12
-; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v16, v8, v12, v0
+; ZVFH-NEXT:    vmv1r.v v0, v4
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
-; ZVFH-NEXT:    vfmin.vv v8, v8, v20
+; ZVFH-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv16f16_vv:
@@ -167,26 +167,36 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 10
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 10
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -199,23 +209,35 @@ declare <vscale x 32 x half> @llvm.minimum.nxv32f16(<vscale x 32 x half>, <vscal
 define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale x 32 x half> %b) nounwind {
 ; ZVFH-LABEL: vfmin_nxv32f16_vv:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 1
+; ZVFH-NEXT:    sub sp, sp, a0
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFH-NEXT:    vmfeq.vv v24, v16, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v24
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 1
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    addi sp, sp, 16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv32f16_vv:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    li a1, 18
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -225,45 +247,63 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v24, v24
+; ZVFHMIN-NEXT:    vmfeq.vv v16, v24, v24
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v3
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFHMIN-NEXT:    vfmin.vv v24, v8, v16
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v16, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v8, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v16, v24, v24
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v16, v0
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a1, a0, 3
+; ZVFHMIN-NEXT:    add a0, a1, a0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    li a1, 18
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -312,11 +352,11 @@ define <vscale x 4 x float> @vfmin_nxv4f32_vv(<vscale x 4 x float> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %v
@@ -329,11 +369,11 @@ define <vscale x 8 x float> @vfmin_nxv8f32_vv(<vscale x 8 x float> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
   ret <vscale x 8 x float> %v
@@ -344,13 +384,23 @@ declare <vscale x 16 x float> @llvm.minimum.nxv16f32(<vscale x 16 x float>, <vsc
 define <vscale x 16 x float> @vfmin_nxv16f32_vv(<vscale x 16 x float> %a, <vscale x 16 x float> %b) nounwind {
 ; CHECK-LABEL: vfmin_nxv16f32_vv:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.minimum.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
   ret <vscale x 16 x float> %v
@@ -380,11 +430,11 @@ define <vscale x 2 x double> @vfmin_nxv2f64_vv(<vscale x 2 x double> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %v
@@ -397,11 +447,11 @@ define <vscale x 4 x double> @vfmin_nxv4f64_vv(<vscale x 4 x double> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
   ret <vscale x 4 x double> %v
@@ -412,13 +462,23 @@ declare <vscale x 8 x double> @llvm.minimum.nxv8f64(<vscale x 8 x double>, <vsca
 define <vscale x 8 x double> @vfmin_nxv8f64_vv(<vscale x 8 x double> %a, <vscale x 8 x double> %b) nounwind {
 ; CHECK-LABEL: vfmin_nxv8f64_vv:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.minimum.nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
   ret <vscale x 8 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
index 69c76152910e86..fdf07b16e76f82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
@@ -174,18 +174,16 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v14, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -214,11 +212,11 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v14, v10, v12, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v14
+; ZVFHMIN-NEXT:    vmfeq.vv v2, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v2
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -233,12 +231,10 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -250,18 +246,16 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v20, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -274,11 +268,11 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10
+; ZVFH-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFH-NEXT:    vmv1r.v v0, v2
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT:    vfmin.vv v8, v8, v14
+; ZVFH-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked:
@@ -290,11 +284,11 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v20, v12, v16, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v20
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v4
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -309,12 +303,10 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vmv1r.v v16, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -322,37 +314,22 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -363,11 +340,11 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v16, v12, v12
-; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v16, v8, v12, v0
+; ZVFH-NEXT:    vmv1r.v v0, v4
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
-; ZVFH-NEXT:    vfmin.vv v8, v8, v20
+; ZVFH-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16_unmasked:
@@ -375,9 +352,10 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    li a2, 10
+; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
@@ -385,18 +363,27 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    li a1, 10
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -417,14 +404,12 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFH-NEXT:    vmv1r.v v7, v0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; ZVFH-NEXT:    addi a0, sp, 16
 ; ZVFH-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v7
 ; ZVFH-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -440,17 +425,11 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 34
+; ZVFHMIN-NEXT:    li a2, 24
 ; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 25
-; ZVFHMIN-NEXT:    mul a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v16, v8
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -459,135 +438,68 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    and a3, a4, a3
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    li a5, 24
-; ZVFHMIN-NEXT:    mul a4, a4, a5
-; ZVFHMIN-NEXT:    add a4, sp, a4
-; ZVFHMIN-NEXT:    addi a4, a4, 16
-; ZVFHMIN-NEXT:    vs1r.v v0, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs1r.v v0, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vslidedown.vx v6, v0, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    li a4, 25
-; ZVFHMIN-NEXT:    mul a2, a2, a4
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl1r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB10_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB10_2:
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
+; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 25
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v24, v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 25
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 25
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 34
+; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
@@ -599,13 +511,25 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfmin_vv_nxv32f16_unmasked:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a1, vlenb
+; ZVFH-NEXT:    slli a1, a1, 1
+; ZVFH-NEXT:    sub sp, sp, a1
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFH-NEXT:    vmfeq.vv v24, v16, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v24
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 1
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    addi sp, sp, 16
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv32f16_unmasked:
@@ -613,9 +537,9 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 5
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -628,80 +552,64 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v7, v24, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v8, v16
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    li a4, 24
-; ZVFHMIN-NEXT:    mul a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v24, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v16
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB11_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB11_2:
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v24, v16, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v3
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v8, v0
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v0, v16
 ; ZVFHMIN-NEXT:    vmv8r.v v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -784,12 +692,10 @@ define <vscale x 4 x float> @vfmin_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -803,11 +709,11 @@ define <vscale x 4 x float> @vfmin_vv_nxv4f32_unmasked(<vscale x 4 x float> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x float> %v
@@ -820,12 +726,10 @@ define <vscale x 8 x float> @vfmin_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -839,11 +743,11 @@ define <vscale x 8 x float> @vfmin_vv_nxv8f32_unmasked(<vscale x 8 x float> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x float> %v
@@ -890,12 +794,10 @@ define <vscale x 2 x double> @vfmin_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -909,11 +811,11 @@ define <vscale x 2 x double> @vfmin_vv_nxv2f64_unmasked(<vscale x 2 x double> %v
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmerge.vvm v12, v8, v10, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v14
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
@@ -926,12 +828,10 @@ define <vscale x 4 x double> @vfmin_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -945,11 +845,11 @@ define <vscale x 4 x double> @vfmin_vv_nxv4f64_unmasked(<vscale x 4 x double> %v
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmerge.vvm v16, v8, v12, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v20
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x double> %v
@@ -968,14 +868,12 @@ define <vscale x 8 x double> @vfmin_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -992,13 +890,25 @@ define <vscale x 8 x double> @vfmin_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 define <vscale x 8 x double> @vfmin_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_nxv8f64_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1012,23 +922,24 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 42
+; CHECK-NEXT:    li a3, 26
 ; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a3, a1, 3
 ; CHECK-NEXT:    add a3, a0, a3
 ; CHECK-NEXT:    vl8re64.v v24, (a3)
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
+; CHECK-NEXT:    slli a4, a3, 4
+; CHECK-NEXT:    add a3, a4, a3
 ; CHECK-NEXT:    add a3, sp, a3
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    slli a4, a4, 5
+; CHECK-NEXT:    slli a4, a4, 3
 ; CHECK-NEXT:    add a4, sp, a4
 ; CHECK-NEXT:    addi a4, a4, 16
 ; CHECK-NEXT:    vs1r.v v0, (a4) # Unknown-size Folded Spill
@@ -1039,7 +950,7 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    and a3, a4, a3
 ; CHECK-NEXT:    vl8re64.v v0, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a4, a0, 5
+; CHECK-NEXT:    slli a4, a0, 3
 ; CHECK-NEXT:    add a0, a4, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1047,94 +958,67 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v24
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a3, a0, 4
+; CHECK-NEXT:    add a0, a3, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v24, v16, v24, v0
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v17, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a0, a0, a3
+; CHECK-NEXT:    slli a3, a0, 4
+; CHECK-NEXT:    add a0, a3, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v17
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vfmin.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a3, a0, 4
+; CHECK-NEXT:    add a0, a3, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB28_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB28_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 5
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl1r.v v7, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a1, a0, 5
+; CHECK-NEXT:    slli a1, a0, 3
 ; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 42
+; CHECK-NEXT:    li a1, 26
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
@@ -1149,37 +1033,56 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double>
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    li a3, 26
 ; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a3, a1, 4
+; CHECK-NEXT:    add a1, a3, a1
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a3, a1, 3
 ; CHECK-NEXT:    add a3, a0, a3
-; CHECK-NEXT:    vl8re64.v v24, (a3)
-; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a3)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    sub a0, a2, a1
 ; CHECK-NEXT:    sltu a3, a2, a0
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a0, a3, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
-; CHECK-NEXT:    vfmin.vv v8, v16, v8
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a3, a0, 3
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a3, a0, 3
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a3, a0, 3
+; CHECK-NEXT:    add a0, a3, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB29_2
 ; CHECK-NEXT:  # %bb.1:
@@ -1187,25 +1090,31 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double>
 ; CHECK-NEXT:  .LBB29_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    add a0, a1, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v7, v8, v8
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    vfmin.vv v8, v8, v24
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v16, v24, v24
 ; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    li a1, 26
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
index 0e09f59b6a20fe..1e387006cd3e2c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -1757,19 +1757,33 @@ define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vsc
 ;
 ; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    sub sp, sp, a2
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; RV64-NEXT:    vl2r.v v6, (a1)
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsext.vf8 v24, v7
+; RV64-NEXT:    vsll.vi v16, v24, 3
 ; RV64-NEXT:    vsext.vf8 v24, v6
 ; RV64-NEXT:    vsll.vi v24, v24, 3
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
-; RV64-NEXT:    vsext.vf8 v8, v7
-; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    srli a1, a1, 3
 ; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vslidedown.vx v0, v0, a1
 ; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs
   %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
@@ -1797,19 +1811,33 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vs
 ;
 ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    sub sp, sp, a2
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; RV64-NEXT:    vl4re16.v v4, (a1)
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT:    vsext.vf4 v24, v6
+; RV64-NEXT:    vsll.vi v16, v24, 3
 ; RV64-NEXT:    vsext.vf4 v24, v4
 ; RV64-NEXT:    vsll.vi v24, v24, 3
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
-; RV64-NEXT:    vsext.vf4 v8, v6
-; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    srli a1, a1, 3
 ; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vslidedown.vx v0, v0, a1
 ; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
   %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
index 8bc2334282653f..21213e5c3c9651 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
@@ -194,23 +194,21 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    fsflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half>, <vsc
 define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    fsflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -347,42 +341,38 @@ declare <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half>, <
 define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v12, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    fsflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
@@ -433,21 +423,19 @@ declare <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half>, <
 define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v16, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16:
@@ -455,10 +443,10 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFHMIN-NEXT:    vmv1r.v v7, v0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -467,21 +455,22 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFHMIN-NEXT:    and a3, a4, a3
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v17, v0, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a2, sp, 16
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    fsflags a2
@@ -489,30 +478,39 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT:    addi a2, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB10_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB10_2:
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v7, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v16, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    vmv1r.v v8, v16
 ; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFHMIN-NEXT:    fsflags a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -538,12 +536,6 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half>
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -554,35 +546,29 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v16, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
 ; ZVFHMIN-NEXT:    fsflags a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB11_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB11_2:
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -594,10 +580,6 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -692,21 +674,19 @@ declare <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float>, <v
 define <vscale x 4 x float> @vp_nearbyint_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -736,21 +716,19 @@ declare <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float>, <v
 define <vscale x 8 x float> @vp_nearbyint_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -780,21 +758,19 @@ declare <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float>,
 define <vscale x 16 x float> @vp_nearbyint_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -866,21 +842,19 @@ declare <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double>,
 define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -910,21 +884,19 @@ declare <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double>,
 define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -954,21 +926,19 @@ declare <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double>,
 define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv7f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -998,21 +968,19 @@ declare <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double>,
 define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1046,16 +1014,16 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va,
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT:    vmv8r.v v24, v16
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 3
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
 ; CHECK-NEXT:    sub a2, a0, a1
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
@@ -1063,60 +1031,36 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va,
 ; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vfabs.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a2
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v8, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a2
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB32_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB32_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v16, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
index f934127f978dc9..a8788c63556714 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
@@ -176,21 +176,19 @@ define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -237,38 +235,34 @@ declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x
 define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -315,38 +309,34 @@ declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscal
 define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v12, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
@@ -393,19 +383,17 @@ declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscal
 define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v16, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv32f16:
@@ -425,20 +413,18 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    and a3, a4, a3
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v17, v0, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
@@ -491,12 +477,6 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -507,33 +487,27 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v16, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB11_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB11_2:
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -543,10 +517,6 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -633,19 +603,17 @@ declare <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float>, <vscale
 define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -673,19 +641,17 @@ declare <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float>, <vscale
 define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -713,19 +679,17 @@ declare <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float>, <vsc
 define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -791,19 +755,17 @@ declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vsca
 define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -831,19 +793,17 @@ declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vsca
 define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -871,19 +831,17 @@ declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vsca
 define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv7f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -911,19 +869,17 @@ declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vsca
 define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -964,7 +920,7 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 3
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
 ; CHECK-NEXT:    sub a2, a0, a1
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
@@ -972,12 +928,10 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc
 ; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
index eb4994914fad9a..8bd2f93683cce5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
@@ -194,23 +194,21 @@ define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half>, <vscale
 define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -347,42 +341,38 @@ declare <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half>, <vsca
 define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v12, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
@@ -433,21 +423,19 @@ declare <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half>, <vsca
 define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v16, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv32f16:
@@ -467,21 +455,19 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    and a3, a4, a3
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v17, v0, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a2, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
 ; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
@@ -539,12 +525,6 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -555,35 +535,29 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v16, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a2, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB11_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB11_2:
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -595,10 +569,6 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -693,21 +663,19 @@ declare <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float>, <vscal
 define <vscale x 4 x float> @vp_round_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -737,21 +705,19 @@ declare <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float>, <vscal
 define <vscale x 8 x float> @vp_round_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -781,21 +747,19 @@ declare <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float>, <vs
 define <vscale x 16 x float> @vp_round_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -867,21 +831,19 @@ declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vsc
 define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -911,21 +873,19 @@ declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vsc
 define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -955,21 +915,19 @@ declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vsc
 define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv7f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -999,21 +957,19 @@ declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vsc
 define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1056,7 +1012,7 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 3
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
 ; CHECK-NEXT:    sub a2, a0, a1
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
@@ -1064,13 +1020,11 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a2, 4
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
 ; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
index f366a2922d079e..2d6f145070b45c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
@@ -194,23 +194,21 @@ define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscal
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half>, <vsc
 define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -347,42 +341,38 @@ declare <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half>, <
 define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v12, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
@@ -433,21 +423,19 @@ declare <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half>, <
 define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v16, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv32f16:
@@ -467,21 +455,19 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFHMIN-NEXT:    and a3, a4, a3
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v17, v0, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a2, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
 ; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
@@ -539,12 +525,6 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half>
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -555,35 +535,29 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v16, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a2, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB11_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB11_2:
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -595,10 +569,6 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -693,21 +663,19 @@ declare <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float>, <v
 define <vscale x 4 x float> @vp_roundeven_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -737,21 +705,19 @@ declare <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float>, <v
 define <vscale x 8 x float> @vp_roundeven_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -781,21 +747,19 @@ declare <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float>,
 define <vscale x 16 x float> @vp_roundeven_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -867,21 +831,19 @@ declare <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double>,
 define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -911,21 +873,19 @@ declare <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double>,
 define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -955,21 +915,19 @@ declare <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double>,
 define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv7f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -999,21 +957,19 @@ declare <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double>,
 define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1056,7 +1012,7 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va,
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 3
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
 ; CHECK-NEXT:    sub a2, a0, a1
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
@@ -1064,13 +1020,11 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va,
 ; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a2, 0
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
 ; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
index 79c940bdf089a5..3992ba8d121292 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
@@ -194,23 +194,21 @@ define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vsc
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -261,42 +259,38 @@ declare <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half>, <v
 define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv8f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v10, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v10
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v10, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -347,42 +341,38 @@ declare <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half>,
 define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv16f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v12, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI8_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI8_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v12, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
@@ -433,21 +423,19 @@ declare <vscale x 32 x half> @llvm.vp.roundtozero.nxv32f16(<vscale x 32 x half>,
 define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vmv1r.v v16, v0
 ; ZVFH-NEXT:    lui a1, %hi(.LCPI10_0)
 ; ZVFH-NEXT:    flh fa5, %lo(.LCPI10_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16:
@@ -467,21 +455,19 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <
 ; ZVFHMIN-NEXT:    and a3, a4, a3
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v17, v0, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a2, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
 ; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
@@ -539,12 +525,6 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -555,35 +535,29 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    lui a2, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v16, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    fsrmi a2, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v16
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB11_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB11_2:
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -595,10 +569,6 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.roundtozero.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -693,21 +663,19 @@ declare <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float>,
 define <vscale x 4 x float> @vp_roundtozero_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -737,21 +705,19 @@ declare <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float>,
 define <vscale x 8 x float> @vp_roundtozero_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -781,21 +747,19 @@ declare <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float
 define <vscale x 16 x float> @vp_roundtozero_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -867,21 +831,19 @@ declare <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double>
 define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v10, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -911,21 +873,19 @@ declare <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double>
 define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI26_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -955,21 +915,19 @@ declare <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double>
 define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv7f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI28_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI28_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -999,21 +957,19 @@ declare <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double>
 define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    lui a1, %hi(.LCPI30_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI30_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1056,7 +1012,7 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 3
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v25, v0, a2
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
 ; CHECK-NEXT:    sub a2, a0, a1
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
@@ -1064,13 +1020,11 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v
 ; CHECK-NEXT:    lui a3, %hi(.LCPI32_0)
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI32_0)(a3)
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    fsrmi a2, 1
-; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
 ; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
index 10b77e53169d93..df6d75264e2c84 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
@@ -133,9 +133,9 @@ define <vscale x 16 x i1> @select_nxv16i1(i1 zeroext %c, <vscale x 16 x i1> %a,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vmsne.vi v9, v10, 0
-; CHECK-NEXT:    vmandn.mm v8, v8, v9
-; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmsne.vi v2, v10, 0
+; CHECK-NEXT:    vmandn.mm v8, v8, v2
+; CHECK-NEXT:    vmand.mm v9, v0, v2
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b
@@ -149,9 +149,9 @@ define <vscale x 16 x i1> @selectcc_nxv16i1(i1 signext %a, i1 signext %b, <vscal
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vmsne.vi v9, v10, 0
-; CHECK-NEXT:    vmandn.mm v8, v8, v9
-; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmsne.vi v2, v10, 0
+; CHECK-NEXT:    vmandn.mm v8, v8, v2
+; CHECK-NEXT:    vmand.mm v9, v0, v2
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i1 %a, %b
@@ -164,9 +164,9 @@ define <vscale x 32 x i1> @select_nxv32i1(i1 zeroext %c, <vscale x 32 x i1> %a,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vmsne.vi v9, v12, 0
-; CHECK-NEXT:    vmandn.mm v8, v8, v9
-; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmsne.vi v4, v12, 0
+; CHECK-NEXT:    vmandn.mm v8, v8, v4
+; CHECK-NEXT:    vmand.mm v9, v0, v4
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 32 x i1> %a, <vscale x 32 x i1> %b
@@ -180,9 +180,9 @@ define <vscale x 32 x i1> @selectcc_nxv32i1(i1 signext %a, i1 signext %b, <vscal
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vmsne.vi v9, v12, 0
-; CHECK-NEXT:    vmandn.mm v8, v8, v9
-; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmsne.vi v4, v12, 0
+; CHECK-NEXT:    vmandn.mm v8, v8, v4
+; CHECK-NEXT:    vmand.mm v9, v0, v4
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i1 %a, %b
@@ -195,9 +195,9 @@ define <vscale x 64 x i1> @select_nxv64i1(i1 zeroext %c, <vscale x 64 x i1> %a,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsne.vi v9, v16, 0
-; CHECK-NEXT:    vmandn.mm v8, v8, v9
-; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmsne.vi v16, v16, 0
+; CHECK-NEXT:    vmandn.mm v8, v8, v16
+; CHECK-NEXT:    vmand.mm v9, v0, v16
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 64 x i1> %a, <vscale x 64 x i1> %b
@@ -211,9 +211,9 @@ define <vscale x 64 x i1> @selectcc_nxv64i1(i1 signext %a, i1 signext %b, <vscal
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsne.vi v9, v16, 0
-; CHECK-NEXT:    vmandn.mm v8, v8, v9
-; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmsne.vi v16, v16, 0
+; CHECK-NEXT:    vmandn.mm v8, v8, v16
+; CHECK-NEXT:    vmand.mm v9, v0, v16
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i1 %a, %b
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 897bfdea69f1f4..87bfbdae461e0a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1083,8 +1083,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f16(<vscale x 3 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f16(<vscale x 3 x half> %va, <vscale x 3 x half> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl)
   ret <vscale x 3 x i1> %v
@@ -1096,8 +1095,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_oeq_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vv_nxv8f16:
@@ -1106,8 +1104,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1117,8 +1114,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_oeq_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv8f16:
@@ -1131,8 +1127,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1144,8 +1139,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_oeq_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_swap_nxv8f16:
@@ -1158,8 +1152,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1171,8 +1164,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ogt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vv_nxv8f16:
@@ -1181,8 +1173,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1192,8 +1183,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ogt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv8f16:
@@ -1206,8 +1196,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1219,8 +1208,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ogt_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_swap_nxv8f16:
@@ -1233,8 +1221,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1246,8 +1233,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_oge_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vv_nxv8f16:
@@ -1256,8 +1242,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1267,8 +1252,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_oge_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv8f16:
@@ -1281,8 +1265,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1294,8 +1277,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_oge_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_swap_nxv8f16:
@@ -1308,8 +1290,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1321,8 +1302,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_olt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vv_nxv8f16:
@@ -1331,8 +1311,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1342,8 +1321,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_olt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv8f16:
@@ -1356,8 +1334,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1369,8 +1346,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_olt_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_swap_nxv8f16:
@@ -1383,8 +1359,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1396,8 +1371,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ole_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vv_nxv8f16:
@@ -1406,8 +1380,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1417,8 +1390,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ole_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv8f16:
@@ -1431,8 +1403,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1444,8 +1415,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ole_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_swap_nxv8f16:
@@ -1458,8 +1428,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1471,9 +1440,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_one_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; ZVFH-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16:
@@ -1482,9 +1451,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v4, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1494,9 +1463,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_one_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16:
@@ -1509,9 +1478,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v4, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1523,9 +1492,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_one_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_swap_nxv8f16:
@@ -1538,9 +1507,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v4, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1552,9 +1521,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10, v0.t
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vv v2, v10, v10, v0.t
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFH-NEXT:    vmand.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16:
@@ -1562,12 +1531,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1579,9 +1548,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vf v2, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFH-NEXT:    vmand.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16:
@@ -1593,12 +1562,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1612,9 +1581,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmand.mm v0, v12, v10
+; ZVFH-NEXT:    vmfeq.vf v2, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFH-NEXT:    vmand.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_swap_nxv8f16:
@@ -1626,12 +1595,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1643,9 +1612,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ueq_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8, v0.t
-; ZVFH-NEXT:    vmnor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; ZVFH-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; ZVFH-NEXT:    vmnor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16:
@@ -1654,9 +1623,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v4, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1666,9 +1635,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ueq_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16:
@@ -1681,9 +1650,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v4, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1695,9 +1664,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ueq_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_swap_nxv8f16:
@@ -1710,9 +1679,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v4, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1724,8 +1693,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16:
@@ -1734,8 +1703,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1745,8 +1714,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16:
@@ -1759,8 +1728,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1772,8 +1741,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ugt_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_swap_nxv8f16:
@@ -1786,8 +1755,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1799,8 +1768,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16:
@@ -1809,8 +1778,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1820,8 +1789,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16:
@@ -1834,8 +1803,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1847,8 +1816,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_uge_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_swap_nxv8f16:
@@ -1861,8 +1830,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1874,8 +1843,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16:
@@ -1884,8 +1853,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1895,8 +1864,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16:
@@ -1909,8 +1878,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1922,8 +1891,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ult_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_swap_nxv8f16:
@@ -1936,8 +1905,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1949,8 +1918,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16:
@@ -1959,8 +1928,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1970,8 +1939,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16:
@@ -1984,8 +1953,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1997,8 +1966,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ule_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_swap_nxv8f16:
@@ -2011,8 +1980,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2024,8 +1993,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_une_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfne.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vv_nxv8f16:
@@ -2034,8 +2002,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2045,8 +2012,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_une_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_nxv8f16:
@@ -2059,8 +2025,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2072,8 +2037,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_une_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_swap_nxv8f16:
@@ -2086,8 +2050,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2099,9 +2062,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v10, v10, v0.t
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vv v2, v10, v10, v0.t
+; ZVFH-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16:
@@ -2109,12 +2072,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2126,9 +2089,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vf v2, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16:
@@ -2140,12 +2103,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2159,9 +2122,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v12, v10
+; ZVFH-NEXT:    vmfne.vf v2, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_swap_nxv8f16:
@@ -2173,12 +2136,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2194,14 +2157,20 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFH-NEXT:    addi sp, sp, -16
 ; ZVFH-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFH-NEXT:    csrr a1, vlenb
-; ZVFH-NEXT:    slli a1, a1, 4
+; ZVFH-NEXT:    li a3, 24
+; ZVFH-NEXT:    mul a1, a1, a3
 ; ZVFH-NEXT:    sub sp, sp, a1
-; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFH-NEXT:    vmv1r.v v24, v0
 ; ZVFH-NEXT:    csrr a1, vlenb
 ; ZVFH-NEXT:    slli a1, a1, 3
 ; ZVFH-NEXT:    add a1, sp, a1
 ; ZVFH-NEXT:    addi a1, a1, 16
+; ZVFH-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-NEXT:    csrr a1, vlenb
+; ZVFH-NEXT:    slli a1, a1, 4
+; ZVFH-NEXT:    add a1, sp, a1
+; ZVFH-NEXT:    addi a1, a1, 16
 ; ZVFH-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; ZVFH-NEXT:    csrr a1, vlenb
 ; ZVFH-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
@@ -2214,12 +2183,17 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFH-NEXT:    addi a5, a5, -1
 ; ZVFH-NEXT:    and a4, a5, a4
 ; ZVFH-NEXT:    srli a1, a1, 1
-; ZVFH-NEXT:    vl8re16.v v0, (a0)
+; ZVFH-NEXT:    vl8re16.v v16, (a0)
 ; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
-; ZVFH-NEXT:    vslidedown.vx v0, v24, a1
+; ZVFH-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT:    vslidedown.vx v0, v0, a1
 ; ZVFH-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v7, v16, v8, v0.t
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT:    vmfeq.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    bltu a2, a3, .LBB85_2
 ; ZVFH-NEXT:  # %bb.1:
 ; ZVFH-NEXT:    mv a2, a3
@@ -2227,19 +2201,19 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    slli a0, a0, 4
 ; ZVFH-NEXT:    add a0, sp, a0
 ; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT:    addi a0, sp, 16
 ; ZVFH-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT:    vmfeq.vv v16, v8, v24, v0.t
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT:    vmfeq.vv v0, v24, v16, v0.t
 ; ZVFH-NEXT:    add a0, a1, a1
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; ZVFH-NEXT:    vslideup.vx v16, v7, a1
-; ZVFH-NEXT:    vmv.v.v v0, v16
+; ZVFH-NEXT:    vslideup.vx v0, v8, a1
 ; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
+; ZVFH-NEXT:    li a1, 24
+; ZVFH-NEXT:    mul a0, a0, a1
 ; ZVFH-NEXT:    add sp, sp, a0
 ; ZVFH-NEXT:    addi sp, sp, 16
 ; ZVFH-NEXT:    ret
@@ -2255,7 +2229,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
 ; ZVFHMIN-NEXT:    vmv8r.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a3, 18
+; ZVFHMIN-NEXT:    li a3, 26
 ; ZVFHMIN-NEXT:    mul a1, a1, a3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
@@ -2284,7 +2258,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    vl8re16.v v8, (a0)
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li t0, 26
+; ZVFHMIN-NEXT:    li t0, 18
 ; ZVFHMIN-NEXT:    mul a0, a0, t0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
@@ -2309,7 +2283,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, a7, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v26, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    bltu a6, a4, .LBB85_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a6, a4
@@ -2331,14 +2305,20 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, a6, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmfeq.vv v6, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    add a0, a3, a3
 ; ZVFHMIN-NEXT:    bltu a2, a5, .LBB85_4
 ; ZVFHMIN-NEXT:  # %bb.3:
 ; ZVFHMIN-NEXT:    mv a2, a5
 ; ZVFHMIN-NEXT:  .LBB85_4:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v6, v26, a3
+; ZVFHMIN-NEXT:    vslideup.vx v0, v24, a3
+; ZVFHMIN-NEXT:    csrr a5, vlenb
+; ZVFHMIN-NEXT:    li a6, 10
+; ZVFHMIN-NEXT:    mul a5, a5, a6
+; ZVFHMIN-NEXT:    add a5, sp, a5
+; ZVFHMIN-NEXT:    addi a5, a5, 16
+; ZVFHMIN-NEXT:    vs1r.v v0, (a5) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    sub a5, a2, a4
 ; ZVFHMIN-NEXT:    sltu a6, a2, a5
 ; ZVFHMIN-NEXT:    addi a6, a6, -1
@@ -2352,39 +2332,31 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
 ; ZVFHMIN-NEXT:    vsetvli a6, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    csrr a6, vlenb
-; ZVFHMIN-NEXT:    li a7, 18
+; ZVFHMIN-NEXT:    li a7, 26
 ; ZVFHMIN-NEXT:    mul a6, a6, a7
 ; ZVFHMIN-NEXT:    add a6, sp, a6
 ; ZVFHMIN-NEXT:    addi a6, a6, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a6) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
 ; ZVFHMIN-NEXT:    csrr a6, vlenb
-; ZVFHMIN-NEXT:    li a7, 10
-; ZVFHMIN-NEXT:    mul a6, a6, a7
-; ZVFHMIN-NEXT:    add a6, sp, a6
-; ZVFHMIN-NEXT:    addi a6, a6, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a6) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT:    csrr a6, vlenb
-; ZVFHMIN-NEXT:    li a7, 26
+; ZVFHMIN-NEXT:    li a7, 18
 ; ZVFHMIN-NEXT:    mul a6, a6, a7
 ; ZVFHMIN-NEXT:    add a6, sp, a6
 ; ZVFHMIN-NEXT:    addi a6, a6, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a6) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, a5, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a5, vlenb
-; ZVFHMIN-NEXT:    li a6, 10
-; ZVFHMIN-NEXT:    mul a5, a5, a6
+; ZVFHMIN-NEXT:    slli a5, a5, 1
 ; ZVFHMIN-NEXT:    add a5, sp, a5
 ; ZVFHMIN-NEXT:    addi a5, a5, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a5) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT:    vmfeq.vv v5, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vs1r.v v0, (a5) # Unknown-size Folded Spill
 ; ZVFHMIN-NEXT:    bltu a2, a4, .LBB85_6
 ; ZVFHMIN-NEXT:  # %bb.5:
 ; ZVFHMIN-NEXT:    mv a2, a4
 ; ZVFHMIN-NEXT:  .LBB85_6:
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
 ; ZVFHMIN-NEXT:    li a5, 26
 ; ZVFHMIN-NEXT:    mul a4, a4, a5
@@ -2392,15 +2364,32 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    csrr a4, vlenb
+; ZVFHMIN-NEXT:    li a5, 18
+; ZVFHMIN-NEXT:    mul a4, a4, a5
+; ZVFHMIN-NEXT:    add a4, sp, a4
+; ZVFHMIN-NEXT:    addi a4, a4, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v8, v5, a3
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vslideup.vx v0, v8, a3
 ; ZVFHMIN-NEXT:    add a0, a1, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v8, v6, a1
-; ZVFHMIN-NEXT:    vmv.v.v v0, v8
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    li a2, 10
+; ZVFHMIN-NEXT:    mul a0, a0, a2
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT:    vslideup.vx v0, v8, a1
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 34
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
@@ -2939,8 +2928,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f64(<vscale x 3 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oeq_vv_nxv3f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f64(<vscale x 3 x double> %va, <vscale x 3 x double> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl)
   ret <vscale x 3 x i1> %v
@@ -2952,8 +2940,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oeq_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2963,8 +2950,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_oeq_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2976,8 +2962,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2989,8 +2974,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3000,8 +2984,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ogt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3013,8 +2996,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3026,8 +3008,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3037,8 +3018,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_oge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3050,8 +3030,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_oge_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3063,8 +3042,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_olt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3074,8 +3052,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_olt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3087,8 +3064,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_olt_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3100,8 +3076,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ole_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3111,8 +3086,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ole_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3124,8 +3098,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ole_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3138,8 +3111,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v25, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3150,8 +3123,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3164,8 +3137,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3177,9 +3150,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16, v0.t
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v16, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3191,9 +3164,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3207,9 +3180,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v16, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3222,8 +3195,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v25, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3234,8 +3207,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3248,8 +3221,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3261,8 +3234,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3272,8 +3245,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3285,8 +3258,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3298,8 +3271,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3309,8 +3282,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_uge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3322,8 +3295,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_uge_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3335,8 +3308,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3346,8 +3319,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ult_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3359,8 +3332,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ult_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3372,8 +3345,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3383,8 +3356,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ule_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3396,8 +3369,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ule_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3409,8 +3382,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_une_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfne.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3420,8 +3392,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_une_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3433,8 +3404,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_une_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3446,9 +3416,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16, v0.t
-; CHECK-NEXT:    vmfne.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v16, v16, v16, v0.t
+; CHECK-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3460,9 +3430,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3476,9 +3446,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v16, v0
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3494,19 +3464,20 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 48
+; CHECK-NEXT:    li a3, 42
 ; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
 ; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 5
+; CHECK-NEXT:    li a3, 25
+; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 40
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a3, a1, 5
+; CHECK-NEXT:    add a1, a3, a1
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -3520,7 +3491,7 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc
 ; CHECK-NEXT:    srli a1, a3, 3
 ; CHECK-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    add a5, a2, t1
-; CHECK-NEXT:    vl8re64.v v8, (a5)
+; CHECK-NEXT:    vl8re64.v v16, (a5)
 ; CHECK-NEXT:    slli t0, a3, 4
 ; CHECK-NEXT:    slli a5, a3, 1
 ; CHECK-NEXT:    vslidedown.vx v0, v0, a1
@@ -3532,72 +3503,81 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc
 ; CHECK-NEXT:    add t2, a2, t2
 ; CHECK-NEXT:    add t1, a0, t1
 ; CHECK-NEXT:    add t0, a2, t0
-; CHECK-NEXT:    vl8re64.v v16, (a2)
+; CHECK-NEXT:    vl8re64.v v8, (a2)
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 4
+; CHECK-NEXT:    slli t3, a2, 3
+; CHECK-NEXT:    add a2, t3, a2
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    sub a2, a7, a3
 ; CHECK-NEXT:    sltu t3, a7, a2
 ; CHECK-NEXT:    addi t3, t3, -1
 ; CHECK-NEXT:    and a2, t3, a2
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 5
+; CHECK-NEXT:    li t3, 25
+; CHECK-NEXT:    mul a2, a2, t3
+; CHECK-NEXT:    add a2, sp, a2
+; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v6, v16, v8, v0.t
+; CHECK-NEXT:    vs1r.v v0, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    bltu a7, a3, .LBB171_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a7, a3
 ; CHECK-NEXT:  .LBB171_4:
-; CHECK-NEXT:    vl8re64.v v8, (t2)
+; CHECK-NEXT:    vl8re64.v v16, (t2)
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 5
+; CHECK-NEXT:    li t2, 25
+; CHECK-NEXT:    mul a2, a2, t2
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    vl8re64.v v8, (t1)
+; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vl8re64.v v16, (t1)
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    li t1, 24
-; CHECK-NEXT:    mul a2, a2, t1
+; CHECK-NEXT:    slli t1, a2, 4
+; CHECK-NEXT:    add a2, t1, a2
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v18, v7, a1
+; CHECK-NEXT:    vslidedown.vx v6, v7, a1
 ; CHECK-NEXT:    vl8re64.v v8, (t0)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    vl8re64.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vl8re64.v v16, (a0)
 ; CHECK-NEXT:    vsetvli zero, a7, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a2, 40
-; CHECK-NEXT:    mul a0, a0, a2
+; CHECK-NEXT:    slli a2, a0, 5
+; CHECK-NEXT:    add a0, a2, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a2, a0, 3
+; CHECK-NEXT:    add a0, a2, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v17, v24, v8, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v24, v8, v0.t
 ; CHECK-NEXT:    add a0, a1, a1
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
 ; CHECK-NEXT:    sub a0, a6, a5
 ; CHECK-NEXT:    sltu a2, a6, a0
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a0, a2, a0
-; CHECK-NEXT:    vslideup.vx v17, v6, a1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 3
+; CHECK-NEXT:    add a2, sp, a2
+; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    vl1r.v v0, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT:    vslideup.vx v8, v0, a1
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    bltu a0, a3, .LBB171_6
 ; CHECK-NEXT:  # %bb.5:
@@ -3605,43 +3585,39 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc
 ; CHECK-NEXT:  .LBB171_6:
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v16, v24, v8, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v16, v24, v0.t
 ; CHECK-NEXT:    add a2, a4, a1
 ; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vx v17, v16, a4
+; CHECK-NEXT:    vslideup.vx v8, v0, a4
 ; CHECK-NEXT:    sub a2, a0, a3
 ; CHECK-NEXT:    sltu a0, a0, a2
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a2
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v18
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 5
+; CHECK-NEXT:    li a2, 25
+; CHECK-NEXT:    mul a0, a0, a2
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a2, 24
-; CHECK-NEXT:    mul a0, a0, a2
+; CHECK-NEXT:    slli a2, a0, 4
+; CHECK-NEXT:    add a0, a2, a0
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmfeq.vv v0, v24, v16, v0.t
 ; CHECK-NEXT:    slli a0, a1, 1
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v17, v16, a0
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vslideup.vx v8, v0, a0
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 48
+; CHECK-NEXT:    li a1, 42
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
index aee255196ce2e8..5d14143b9b1e59 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
@@ -580,9 +580,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_one_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8
-; ZVFH-NEXT:    vmor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10
+; ZVFH-NEXT:    vmflt.vv v2, v10, v8
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16:
@@ -591,9 +591,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v4, v12, v16
+; ZVFHMIN-NEXT:    vmor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp one <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -603,9 +603,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_one_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmfgt.vf v2, v8, fa0
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16:
@@ -618,9 +618,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vv v4, v16, v12
+; ZVFHMIN-NEXT:    vmor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -632,9 +632,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_one_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmflt.vf v2, v8, fa0
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_fv_nxv8f16:
@@ -647,9 +647,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v4, v12, v16
+; ZVFHMIN-NEXT:    vmor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -705,9 +705,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vmfeq.vv v2, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16:
@@ -715,12 +715,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmand.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -731,9 +731,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vf v0, v10, fa0
+; ZVFH-NEXT:    vmfeq.vv v2, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16:
@@ -745,12 +745,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmand.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v12, v10
+; ZVFH-NEXT:    vmfeq.vf v0, v10, fa0
+; ZVFH-NEXT:    vmfeq.vv v2, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_fv_nxv8f16:
@@ -777,12 +777,12 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmand.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -794,9 +794,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16_nonans:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vmfeq.vv v2, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16_nonans:
@@ -804,12 +804,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmand.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -820,9 +820,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vf v0, v10, fa0
+; ZVFH-NEXT:    vmfeq.vv v2, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16_nonans:
@@ -834,12 +834,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v9, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfeq.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmand.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -851,9 +851,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ueq_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8
-; ZVFH-NEXT:    vmnor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10
+; ZVFH-NEXT:    vmflt.vv v2, v10, v8
+; ZVFH-NEXT:    vmnor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16:
@@ -862,9 +862,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v4, v12, v16
+; ZVFHMIN-NEXT:    vmnor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ueq <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -874,9 +874,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ueq_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmfgt.vf v2, v8, fa0
+; ZVFH-NEXT:    vmnor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16:
@@ -889,9 +889,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vv v4, v16, v12
+; ZVFHMIN-NEXT:    vmnor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -903,9 +903,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ueq_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmflt.vf v2, v8, fa0
+; ZVFH-NEXT:    vmnor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_fv_nxv8f16:
@@ -918,9 +918,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16
-; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v4, v12, v16
+; ZVFHMIN-NEXT:    vmnor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -976,8 +976,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v8, v10
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v8, v10
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16:
@@ -986,8 +986,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ugt <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -997,8 +997,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16:
@@ -1011,8 +1011,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1024,8 +1024,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ugt_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_fv_nxv8f16:
@@ -1038,8 +1038,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1095,8 +1095,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16:
@@ -1105,8 +1105,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp uge <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1116,8 +1116,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16:
@@ -1130,8 +1130,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1143,8 +1143,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_uge_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_fv_nxv8f16:
@@ -1157,8 +1157,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1214,8 +1214,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v10, v8
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v10, v8
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16:
@@ -1224,8 +1224,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ult <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1235,8 +1235,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16:
@@ -1249,8 +1249,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1262,8 +1262,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ult_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_fv_nxv8f16:
@@ -1276,8 +1276,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1333,8 +1333,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v10, v8
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v10, v8
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16:
@@ -1343,8 +1343,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ule <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1354,8 +1354,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16:
@@ -1368,8 +1368,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1381,8 +1381,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ule_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_fv_nxv8f16:
@@ -1395,8 +1395,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmnot.m v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmnot.m v0, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1565,9 +1565,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v10, v10
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vv v0, v10, v10
+; ZVFH-NEXT:    vmfne.vv v2, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16:
@@ -1575,12 +1575,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1591,9 +1591,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vf v0, v10, fa0
+; ZVFH-NEXT:    vmfne.vv v2, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16:
@@ -1605,12 +1605,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1623,9 +1623,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v12, v10
+; ZVFH-NEXT:    vmfne.vf v0, v10, fa0
+; ZVFH-NEXT:    vmfne.vv v2, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v0, v2
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_fv_nxv8f16:
@@ -1637,12 +1637,12 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1654,9 +1654,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16_nonans:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v10, v10
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vv v0, v10, v10
+; ZVFH-NEXT:    vmfne.vv v2, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16_nonans:
@@ -1664,12 +1664,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmor.mm v0, v4, v0
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1680,9 +1680,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vf v0, v10, fa0
+; ZVFH-NEXT:    vmfne.vv v2, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v2, v0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16_nonans:
@@ -1694,12 +1694,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v9, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v9
+; ZVFHMIN-NEXT:    vmfne.vv v4, v12, v12
+; ZVFHMIN-NEXT:    vmor.mm v0, v0, v4
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1991,9 +1991,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_one_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp one <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2003,9 +2003,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_one_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2017,9 +2017,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_one_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2053,9 +2053,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2066,9 +2066,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2081,9 +2081,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2095,9 +2095,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32_nonans(<vscale x 8 x float> %va, <
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2108,9 +2108,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2122,9 +2122,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ueq <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2134,9 +2134,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ueq_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2148,9 +2148,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ueq_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2184,8 +2184,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ugt <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2195,8 +2195,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2208,8 +2208,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ugt_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2243,8 +2243,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp uge <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2254,8 +2254,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_uge_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2267,8 +2267,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_uge_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2302,8 +2302,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ult <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2313,8 +2313,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ult_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2326,8 +2326,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ult_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2361,8 +2361,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ule <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2372,8 +2372,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ule_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2385,8 +2385,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ule_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2476,9 +2476,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2489,9 +2489,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2504,9 +2504,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2518,9 +2518,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32_nonans(<vscale x 8 x float> %va, <
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2531,9 +2531,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2825,9 +2825,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_one_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp one <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2837,9 +2837,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_one_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2851,9 +2851,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_one_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2887,9 +2887,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2900,9 +2900,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2915,9 +2915,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2929,9 +2929,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2942,9 +2942,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2956,9 +2956,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ueq <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2968,9 +2968,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -2982,9 +2982,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ueq_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3018,8 +3018,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ugt <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3029,8 +3029,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3042,8 +3042,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ugt_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3077,8 +3077,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp uge <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3088,8 +3088,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_uge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3101,8 +3101,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_uge_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3136,8 +3136,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ult <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3147,8 +3147,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ult_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3160,8 +3160,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ult_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3195,8 +3195,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp ule <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3206,8 +3206,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ule_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3219,8 +3219,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ule_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3310,9 +3310,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v0, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3323,9 +3323,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3338,9 +3338,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3352,9 +3352,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v0, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3365,9 +3365,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3382,52 +3382,52 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fcvt.d.w fa5, zero
 ; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32-NEXT:    vmfeq.vf v24, v16, fa5
+; RV32-NEXT:    vmfeq.vf v16, v16, fa5
 ; RV32-NEXT:    vmfeq.vf v0, v8, fa5
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    srli a0, a0, 3
 ; RV32-NEXT:    add a1, a0, a0
 ; RV32-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; RV32-NEXT:    vslideup.vx v0, v24, a0
+; RV32-NEXT:    vslideup.vx v0, v16, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: fcmp_oeq_vf_nx16f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fmv.d.x fa5, zero
 ; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmfeq.vf v24, v16, fa5
+; RV64-NEXT:    vmfeq.vf v16, v16, fa5
 ; RV64-NEXT:    vmfeq.vf v0, v8, fa5
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    srli a0, a0, 3
 ; RV64-NEXT:    add a1, a0, a0
 ; RV64-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; RV64-NEXT:    vslideup.vx v0, v24, a0
+; RV64-NEXT:    vslideup.vx v0, v16, a0
 ; RV64-NEXT:    ret
 ;
 ; ZVFHMIN32-LABEL: fcmp_oeq_vf_nx16f64:
 ; ZVFHMIN32:       # %bb.0:
 ; ZVFHMIN32-NEXT:    fcvt.d.w fa5, zero
 ; ZVFHMIN32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; ZVFHMIN32-NEXT:    vmfeq.vf v24, v16, fa5
+; ZVFHMIN32-NEXT:    vmfeq.vf v16, v16, fa5
 ; ZVFHMIN32-NEXT:    vmfeq.vf v0, v8, fa5
 ; ZVFHMIN32-NEXT:    csrr a0, vlenb
 ; ZVFHMIN32-NEXT:    srli a0, a0, 3
 ; ZVFHMIN32-NEXT:    add a1, a0, a0
 ; ZVFHMIN32-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; ZVFHMIN32-NEXT:    vslideup.vx v0, v24, a0
+; ZVFHMIN32-NEXT:    vslideup.vx v0, v16, a0
 ; ZVFHMIN32-NEXT:    ret
 ;
 ; ZVFHMIN64-LABEL: fcmp_oeq_vf_nx16f64:
 ; ZVFHMIN64:       # %bb.0:
 ; ZVFHMIN64-NEXT:    fmv.d.x fa5, zero
 ; ZVFHMIN64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; ZVFHMIN64-NEXT:    vmfeq.vf v24, v16, fa5
+; ZVFHMIN64-NEXT:    vmfeq.vf v16, v16, fa5
 ; ZVFHMIN64-NEXT:    vmfeq.vf v0, v8, fa5
 ; ZVFHMIN64-NEXT:    csrr a0, vlenb
 ; ZVFHMIN64-NEXT:    srli a0, a0, 3
 ; ZVFHMIN64-NEXT:    add a1, a0, a0
 ; ZVFHMIN64-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; ZVFHMIN64-NEXT:    vslideup.vx v0, v24, a0
+; ZVFHMIN64-NEXT:    vslideup.vx v0, v16, a0
 ; ZVFHMIN64-NEXT:    ret
   %vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer
   ret <vscale x 16 x i1> %vc
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index 7fd77c050b2957..8f6b8d23aece4f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -1089,22 +1089,17 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    vmv1r.v v7, v0
+; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a4, a0, a1
-; CHECK-NEXT:    vl8r.v v8, (a4)
-; CHECK-NEXT:    vl8r.v v0, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vl8r.v v24, (a4)
+; CHECK-NEXT:    vl8r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
 ; CHECK-NEXT:    sub a0, a3, a1
 ; CHECK-NEXT:    vlm.v v0, (a2)
@@ -1112,25 +1107,19 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a0, a2, a0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v7, v16, v8, v0.t
+; CHECK-NEXT:    vmseq.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    bltu a3, a1, .LBB96_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a3, a1
 ; CHECK-NEXT:  .LBB96_2:
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vv v16, v8, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v7
+; CHECK-NEXT:    vmseq.vv v0, v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -1151,16 +1140,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b,
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a3, a4, a3
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a1, .LBB97_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB97_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0
   %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
@@ -1181,16 +1169,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_swap_nxv128i8(<vscale x 128 x i8> %va, i8
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a3, a4, a3
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a1, .LBB98_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB98_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0
   %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
@@ -1698,8 +1685,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1709,8 +1695,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs
 ; CHECK-LABEL: icmp_eq_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1722,8 +1707,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b
 ; CHECK-LABEL: icmp_eq_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1735,8 +1719,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1746,8 +1729,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal
 ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1757,8 +1739,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1768,8 +1749,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs
 ; CHECK-LABEL: icmp_ne_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1781,8 +1761,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b
 ; CHECK-LABEL: icmp_ne_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1794,8 +1773,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1805,8 +1783,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal
 ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1816,8 +1793,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1827,8 +1803,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_ugt_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1840,8 +1815,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_ugt_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1853,8 +1827,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1864,8 +1837,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1875,8 +1847,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1886,10 +1857,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_uge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1901,8 +1871,7 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_uge_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1914,8 +1883,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1925,8 +1893,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1936,8 +1903,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1947,8 +1913,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_ult_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1960,8 +1925,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_ult_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1973,8 +1937,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1984,8 +1947,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1995,8 +1957,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2006,8 +1967,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_sgt_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2019,8 +1979,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_sgt_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2032,8 +1991,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2043,8 +2001,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2054,8 +2011,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2065,10 +2021,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_sge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2080,8 +2035,7 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_sge_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2093,8 +2047,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2104,8 +2057,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2115,8 +2067,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2126,8 +2077,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_slt_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2139,8 +2089,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_slt_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2152,8 +2101,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2163,8 +2111,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2174,8 +2121,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2185,8 +2131,7 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_sle_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2198,10 +2143,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2213,8 +2157,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2224,8 +2167,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2239,14 +2181,20 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    add a1, sp, a1
+; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
@@ -2259,12 +2207,17 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
 ; CHECK-NEXT:    addi a5, a5, -1
 ; CHECK-NEXT:    and a4, a5, a4
 ; CHECK-NEXT:    srli a1, a1, 2
-; CHECK-NEXT:    vl8re32.v v0, (a0)
+; CHECK-NEXT:    vl8re32.v v16, (a0)
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    vslidedown.vx v0, v24, a1
+; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vslidedown.vx v0, v0, a1
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v7, v16, v8, v0.t
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmseq.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    bltu a2, a3, .LBB189_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a3
@@ -2272,19 +2225,19 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vmseq.vv v0, v24, v16, v0.t
 ; CHECK-NEXT:    add a0, a1, a1
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v16, v7, a1
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vslideup.vx v0, v8, a1
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
@@ -2306,18 +2259,17 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b,
 ; CHECK-NEXT:    addi a5, a5, -1
 ; CHECK-NEXT:    and a4, a5, a4
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a1, a3, .LBB190_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a1, a3
 ; CHECK-NEXT:  .LBB190_2:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    add a0, a2, a2
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v16, v25, a2
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vslideup.vx v0, v16, a2
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
@@ -2339,18 +2291,17 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32
 ; CHECK-NEXT:    addi a5, a5, -1
 ; CHECK-NEXT:    and a4, a5, a4
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a1, a3, .LBB191_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a1, a3
 ; CHECK-NEXT:  .LBB191_2:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    add a0, a2, a2
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v16, v25, a2
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vslideup.vx v0, v16, a2
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
@@ -3110,8 +3061,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmseq.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3126,18 +3076,16 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmseq.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmseq.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_eq_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3154,18 +3102,16 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmseq.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmseq.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_eq_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3177,8 +3123,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3188,8 +3133,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal
 ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3199,8 +3143,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsne.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsne.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3215,18 +3158,16 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsne.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsne.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ne_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsne.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3243,18 +3184,16 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsne.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsne.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ne_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsne.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3266,8 +3205,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsne.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3277,8 +3215,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal
 ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsne.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3288,8 +3225,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsltu.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsltu.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3304,18 +3240,16 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsltu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsltu.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ugt_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgtu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3332,18 +3266,16 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsltu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsltu.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ugt_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsltu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3355,8 +3287,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3366,8 +3297,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3377,8 +3307,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsleu.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3393,20 +3322,18 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsleu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsleu.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_uge_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
+; RV64-NEXT:    vmv.v.x v16, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsleu.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsleu.vv v0, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3423,18 +3350,16 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsleu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsleu.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_uge_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsleu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3446,8 +3371,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3457,8 +3381,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3468,8 +3391,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsltu.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsltu.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3484,18 +3406,16 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsltu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsltu.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ult_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsltu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3512,18 +3432,16 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsltu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsltu.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_ult_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgtu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3535,8 +3453,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3546,8 +3463,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3557,8 +3473,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmslt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmslt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3573,18 +3488,16 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmslt.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmslt.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sgt_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3601,18 +3514,16 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmslt.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmslt.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sgt_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmslt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3624,8 +3535,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3635,8 +3545,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3646,8 +3555,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3662,20 +3570,18 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sge_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
+; RV64-NEXT:    vmv.v.x v16, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3692,18 +3598,16 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsle.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsle.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sge_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsle.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3715,8 +3619,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3726,8 +3629,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3737,8 +3639,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmslt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmslt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3753,18 +3654,16 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmslt.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmslt.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_slt_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmslt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3781,18 +3680,16 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmslt.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmslt.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_slt_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3804,8 +3701,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3815,8 +3711,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3826,8 +3721,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3842,18 +3736,16 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsle.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsle.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sle_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsle.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3870,20 +3762,18 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vlse64.v v16, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
+; RV64-NEXT:    vmv.v.x v16, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3895,8 +3785,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3906,8 +3795,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
index 90ffeff9689e07..46de7bdcf39b44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
@@ -2981,10 +2981,10 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) {
 ; CHECK-NEXT:    srli a0, a0, 3
 ; CHECK-NEXT:    add a1, a0, a0
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v24, v16, 0
+; CHECK-NEXT:    vmseq.vi v16, v16, 0
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v24, a0
+; CHECK-NEXT:    vslideup.vx v0, v16, a0
 ; CHECK-NEXT:    ret
   %vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer
   ret <vscale x 16 x i1> %vc
diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
index b7fe722958bfb8..39f99f417fca93 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
@@ -96,12 +96,12 @@ define <vscale x 2 x i64> @vec_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64>
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    srli a1, a0, 1
 ; CHECK-NEXT:    vsll.vv v12, v8, v10
-; CHECK-NEXT:    vsra.vv v14, v12, v10
-; CHECK-NEXT:    vmsne.vv v10, v8, v14
+; CHECK-NEXT:    vsra.vv v10, v12, v10
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
 ; CHECK-NEXT:    vmv.v.x v8, a1
 ; CHECK-NEXT:    slli a0, a0, 63
 ; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %tmp = call <vscale x 2 x i64> @llvm.sshl.sat.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
@@ -116,11 +116,11 @@ define <vscale x 4 x i32> @vec_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32>
 ; CHECK-NEXT:    lui a0, 524288
 ; CHECK-NEXT:    addi a1, a0, -1
 ; CHECK-NEXT:    vsll.vv v12, v8, v10
-; CHECK-NEXT:    vsra.vv v14, v12, v10
-; CHECK-NEXT:    vmsne.vv v10, v8, v14
+; CHECK-NEXT:    vsra.vv v10, v12, v10
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
 ; CHECK-NEXT:    vmv.v.x v8, a1
 ; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %tmp = call <vscale x 4 x i32> @llvm.sshl.sat.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
@@ -135,11 +135,11 @@ define <vscale x 8 x i16> @vec_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16>
 ; CHECK-NEXT:    lui a0, 8
 ; CHECK-NEXT:    addi a1, a0, -1
 ; CHECK-NEXT:    vsll.vv v12, v8, v10
-; CHECK-NEXT:    vsra.vv v14, v12, v10
-; CHECK-NEXT:    vmsne.vv v10, v8, v14
+; CHECK-NEXT:    vsra.vv v10, v12, v10
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
 ; CHECK-NEXT:    vmv.v.x v8, a1
 ; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %tmp = call <vscale x 8 x i16> @llvm.sshl.sat.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
@@ -153,12 +153,12 @@ define <vscale x 16 x i8> @vec_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8>
 ; CHECK-NEXT:    vmsle.vi v0, v8, -1
 ; CHECK-NEXT:    li a0, 127
 ; CHECK-NEXT:    vsll.vv v12, v8, v10
-; CHECK-NEXT:    vsra.vv v14, v12, v10
-; CHECK-NEXT:    vmsne.vv v10, v8, v14
+; CHECK-NEXT:    vsra.vv v10, v12, v10
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
 ; CHECK-NEXT:    vmv.v.x v8, a0
 ; CHECK-NEXT:    li a0, 128
 ; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %tmp = call <vscale x 16 x i8> @llvm.sshl.sat.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index a320aecc6fce49..4332bf36660aac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -20,8 +20,8 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_load_nxv16i
 ; CHECK-NEXT:    vmerge.vim v12, v10, 1, v0
 ; CHECK-NEXT:    vnsrl.wi v8, v12, 0
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    vnsrl.wi v10, v12, 8
-; CHECK-NEXT:    vmsne.vi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v12, 8
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
 ; CHECK-NEXT:    ret
   %vec = load <vscale x 32 x i1>, ptr %p
   %retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.experimental.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 9a5e86d61c265e..b55cdb3afedb95 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -17,15 +17,15 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
 ; CHECK-NEXT:    vwaddu.vv v16, v8, v12
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    vwmaccu.vx v16, a1, v12
-; CHECK-NEXT:    vmsne.vi v8, v18, 0
-; CHECK-NEXT:    vmsne.vi v9, v16, 0
+; CHECK-NEXT:    vmsne.vi v0, v18, 0
+; CHECK-NEXT:    vmsne.vi v2, v16, 0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a1, a1, 2
 ; CHECK-NEXT:    add a2, a1, a1
 ; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v9, v8, a1
+; CHECK-NEXT:    vslideup.vx v2, v0, a1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vsm.v v9, (a0)
+; CHECK-NEXT:    vsm.v v2, (a0)
 ; CHECK-NEXT:    ret
   %res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
   store <vscale x 32 x i1> %res, ptr %p
@@ -98,43 +98,34 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    vmv8r.v v0, v8
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    srli a2, a1, 1
 ; CHECK-NEXT:    vsetvli a3, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vid.v v24
 ; CHECK-NEXT:    vand.vi v26, v24, 1
-; CHECK-NEXT:    vmsne.vi v28, v26, 0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
 ; CHECK-NEXT:    vsrl.vi v24, v24, 1
-; CHECK-NEXT:    vmv1r.v v0, v28
+; CHECK-NEXT:    vmv4r.v v28, v4
 ; CHECK-NEXT:    vadd.vx v24, v24, a2, v0.t
 ; CHECK-NEXT:    vmv4r.v v12, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vrgatherei16.vv v0, v8, v24
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v0, (a2) # Unknown-size Folded Spill
 ; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmv4r.v v16, v12
+; CHECK-NEXT:    vs8r.v v0, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT:    vmv4r.v v16, v28
 ; CHECK-NEXT:    vrgatherei16.vv v8, v16, v24
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    vs8r.v v8, (a1)
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vs8r.v v8, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 4b6ad0f27214d0..4b456d8b531dff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -19,13 +19,13 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
 ; CHECK-NEXT:    vwaddu.vv v16, v8, v12
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v16, a0, v12
-; CHECK-NEXT:    vmsne.vi v8, v18, 0
+; CHECK-NEXT:    vmsne.vi v2, v18, 0
 ; CHECK-NEXT:    vmsne.vi v0, v16, 0
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    srli a0, a0, 2
 ; CHECK-NEXT:    add a1, a0, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v8, a0
+; CHECK-NEXT:    vslideup.vx v0, v2, a0
 ; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
@@ -39,13 +39,13 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
 ; ZVBB-NEXT:    li a0, 1
 ; ZVBB-NEXT:    vmv1r.v v0, v9
 ; ZVBB-NEXT:    vwaddu.wx v12, v12, a0, v0.t
-; ZVBB-NEXT:    vmsne.vi v8, v14, 0
+; ZVBB-NEXT:    vmsne.vi v2, v14, 0
 ; ZVBB-NEXT:    vmsne.vi v0, v12, 0
 ; ZVBB-NEXT:    csrr a0, vlenb
 ; ZVBB-NEXT:    srli a0, a0, 2
 ; ZVBB-NEXT:    add a1, a0, a0
 ; ZVBB-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; ZVBB-NEXT:    vslideup.vx v0, v8, a0
+; ZVBB-NEXT:    vslideup.vx v0, v2, a0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
   ret <vscale x 32 x i1> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll
index 1cc9ea029d4574..45e9854045138a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll
@@ -1942,12 +1942,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ogt_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1958,12 +1956,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1976,12 +1972,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1993,12 +1987,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_oge_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2009,12 +2001,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2027,12 +2017,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2044,12 +2032,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_olt_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2060,12 +2046,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2078,12 +2062,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2095,12 +2077,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ole_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2111,12 +2091,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2129,12 +2107,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2146,14 +2122,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_one_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2164,14 +2139,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2184,14 +2158,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2203,9 +2176,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2216,9 +2189,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2231,9 +2204,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2245,14 +2218,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ueq_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2263,14 +2235,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2283,14 +2254,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2302,12 +2272,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2318,12 +2287,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2336,12 +2304,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2353,12 +2320,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2369,12 +2335,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2387,12 +2352,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2404,12 +2368,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2420,12 +2383,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2438,12 +2400,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2455,12 +2416,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2471,12 +2431,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2489,12 +2448,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2540,9 +2498,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2553,9 +2511,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2568,9 +2526,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2617,12 +2575,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ogt_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2633,12 +2589,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2651,12 +2605,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2668,12 +2620,10 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_oge_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2684,12 +2634,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2702,12 +2650,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2719,12 +2665,10 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_olt_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2735,12 +2679,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2753,12 +2695,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2770,12 +2710,10 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ole_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2786,12 +2724,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2804,12 +2740,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2821,14 +2755,13 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_one_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2839,14 +2772,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2859,14 +2791,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2878,9 +2809,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ord_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2891,9 +2822,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2906,9 +2837,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2920,14 +2851,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ueq_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2938,14 +2868,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2958,14 +2887,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2977,12 +2905,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ugt_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2993,12 +2920,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3011,12 +2937,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3028,12 +2953,11 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_uge_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3044,12 +2968,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3062,12 +2985,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3079,12 +3001,11 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ult_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3095,12 +3016,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3113,12 +3033,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3130,12 +3049,11 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ule_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3146,12 +3064,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3164,12 +3081,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3215,9 +3131,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_uno_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3228,9 +3144,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3243,9 +3159,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3292,12 +3208,10 @@ define <vscale x 32 x i1> @fcmp_ogt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ogt_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3308,12 +3222,10 @@ define <vscale x 32 x i1> @fcmp_ogt_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3326,12 +3238,10 @@ define <vscale x 32 x i1> @fcmp_ogt_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3343,12 +3253,10 @@ define <vscale x 32 x i1> @fcmp_oge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_oge_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3359,12 +3267,10 @@ define <vscale x 32 x i1> @fcmp_oge_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3377,12 +3283,10 @@ define <vscale x 32 x i1> @fcmp_oge_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3394,12 +3298,10 @@ define <vscale x 32 x i1> @fcmp_olt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_olt_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3410,12 +3312,10 @@ define <vscale x 32 x i1> @fcmp_olt_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3428,12 +3328,10 @@ define <vscale x 32 x i1> @fcmp_olt_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3445,12 +3343,10 @@ define <vscale x 32 x i1> @fcmp_ole_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ole_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3461,12 +3357,10 @@ define <vscale x 32 x i1> @fcmp_ole_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3479,12 +3373,10 @@ define <vscale x 32 x i1> @fcmp_ole_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3496,14 +3388,13 @@ define <vscale x 32 x i1> @fcmp_one_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_one_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3514,14 +3405,13 @@ define <vscale x 32 x i1> @fcmp_one_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3534,14 +3424,13 @@ define <vscale x 32 x i1> @fcmp_one_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3553,9 +3442,9 @@ define <vscale x 32 x i1> @fcmp_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ord_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3566,9 +3455,9 @@ define <vscale x 32 x i1> @fcmp_ord_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3581,9 +3470,9 @@ define <vscale x 32 x i1> @fcmp_ord_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3595,14 +3484,13 @@ define <vscale x 32 x i1> @fcmp_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ueq_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v24, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3613,14 +3501,13 @@ define <vscale x 32 x i1> @fcmp_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3633,14 +3520,13 @@ define <vscale x 32 x i1> @fcmp_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3652,12 +3538,11 @@ define <vscale x 32 x i1> @fcmp_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ugt_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3668,12 +3553,11 @@ define <vscale x 32 x i1> @fcmp_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3686,12 +3570,11 @@ define <vscale x 32 x i1> @fcmp_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3703,12 +3586,11 @@ define <vscale x 32 x i1> @fcmp_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_uge_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3719,12 +3601,11 @@ define <vscale x 32 x i1> @fcmp_uge_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3737,12 +3618,11 @@ define <vscale x 32 x i1> @fcmp_uge_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3754,12 +3634,11 @@ define <vscale x 32 x i1> @fcmp_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ult_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3770,12 +3649,11 @@ define <vscale x 32 x i1> @fcmp_ult_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3788,12 +3666,11 @@ define <vscale x 32 x i1> @fcmp_ult_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3805,12 +3682,11 @@ define <vscale x 32 x i1> @fcmp_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ule_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3821,12 +3697,11 @@ define <vscale x 32 x i1> @fcmp_ule_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3839,12 +3714,11 @@ define <vscale x 32 x i1> @fcmp_ule_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3890,9 +3764,9 @@ define <vscale x 32 x i1> @fcmp_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_uno_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v0, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3903,9 +3777,9 @@ define <vscale x 32 x i1> @fcmp_uno_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3918,9 +3792,9 @@ define <vscale x 32 x i1> @fcmp_uno_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -5233,12 +5107,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ogt_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5249,12 +5121,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5267,12 +5137,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5284,12 +5152,10 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_oge_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5300,12 +5166,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5318,12 +5182,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5335,12 +5197,10 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_olt_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5351,12 +5211,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5369,12 +5227,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5386,12 +5242,10 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ole_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5402,12 +5256,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5420,12 +5272,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5437,14 +5287,13 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_one_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5455,14 +5304,13 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5475,14 +5323,13 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5494,9 +5341,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5507,9 +5354,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5522,9 +5369,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5536,14 +5383,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ueq_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5554,14 +5400,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5574,14 +5419,13 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5593,12 +5437,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5609,12 +5452,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5627,12 +5469,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5644,12 +5485,11 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5660,12 +5500,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5678,12 +5517,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5695,12 +5533,11 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5711,12 +5548,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5729,12 +5565,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5746,12 +5581,11 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5762,12 +5596,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5780,12 +5613,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5831,9 +5663,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5844,9 +5676,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5859,9 +5691,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5908,12 +5740,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ogt_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5924,12 +5754,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5942,12 +5770,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5959,12 +5785,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_oge_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5975,12 +5799,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5993,12 +5815,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6010,12 +5830,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_olt_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6026,12 +5844,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6044,12 +5860,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6061,12 +5875,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ole_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6077,12 +5889,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6095,12 +5905,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6112,14 +5920,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_one_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6130,14 +5937,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6150,14 +5956,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6169,9 +5974,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6182,9 +5987,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6197,9 +6002,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6211,14 +6016,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6229,14 +6033,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6249,14 +6052,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6268,12 +6070,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6284,12 +6085,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6302,12 +6102,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6319,12 +6118,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6335,12 +6133,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6353,12 +6150,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6370,12 +6166,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6386,12 +6181,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6404,12 +6198,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6421,12 +6214,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6437,12 +6229,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6455,12 +6246,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6506,9 +6296,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6519,9 +6309,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6534,9 +6324,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6583,12 +6373,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ogt_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6599,12 +6387,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6617,12 +6403,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6634,12 +6418,10 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_oge_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6650,12 +6432,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6668,12 +6448,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6685,12 +6463,10 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_olt_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6701,12 +6477,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6719,12 +6493,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6736,12 +6508,10 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ole_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6752,12 +6522,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6770,12 +6538,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6787,14 +6553,13 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_one_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6805,14 +6570,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6825,14 +6589,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6844,9 +6607,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ord_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6857,9 +6620,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6872,9 +6635,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6886,14 +6649,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ueq_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v24, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6904,14 +6666,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6924,14 +6685,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6943,12 +6703,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ugt_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6959,12 +6718,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6977,12 +6735,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6994,12 +6751,11 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_uge_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7010,12 +6766,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7028,12 +6783,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7045,12 +6799,11 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ult_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7061,12 +6814,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7079,12 +6831,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7096,12 +6847,11 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ule_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7112,12 +6862,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7130,12 +6879,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7181,9 +6929,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_uno_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v0, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7194,9 +6942,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7209,9 +6957,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7891,12 +7639,10 @@ define <vscale x 2 x i1> @fcmp_ogt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ogt_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -7907,12 +7653,10 @@ define <vscale x 2 x i1> @fcmp_ogt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7925,12 +7669,10 @@ define <vscale x 2 x i1> @fcmp_ogt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7942,12 +7684,10 @@ define <vscale x 2 x i1> @fcmp_oge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oge_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -7958,12 +7698,10 @@ define <vscale x 2 x i1> @fcmp_oge_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7976,12 +7714,10 @@ define <vscale x 2 x i1> @fcmp_oge_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7993,12 +7729,10 @@ define <vscale x 2 x i1> @fcmp_olt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_olt_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8009,12 +7743,10 @@ define <vscale x 2 x i1> @fcmp_olt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8027,12 +7759,10 @@ define <vscale x 2 x i1> @fcmp_olt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8044,12 +7774,10 @@ define <vscale x 2 x i1> @fcmp_ole_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ole_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8060,12 +7788,10 @@ define <vscale x 2 x i1> @fcmp_ole_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8078,12 +7804,10 @@ define <vscale x 2 x i1> @fcmp_ole_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8095,14 +7819,13 @@ define <vscale x 2 x i1> @fcmp_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_one_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8113,14 +7836,13 @@ define <vscale x 2 x i1> @fcmp_one_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8133,14 +7855,13 @@ define <vscale x 2 x i1> @fcmp_one_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8152,9 +7873,9 @@ define <vscale x 2 x i1> @fcmp_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8165,9 +7886,9 @@ define <vscale x 2 x i1> @fcmp_ord_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8180,9 +7901,9 @@ define <vscale x 2 x i1> @fcmp_ord_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8194,14 +7915,13 @@ define <vscale x 2 x i1> @fcmp_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ueq_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vv v2, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8212,14 +7932,13 @@ define <vscale x 2 x i1> @fcmp_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8232,14 +7951,13 @@ define <vscale x 2 x i1> @fcmp_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmv1r.v v2, v0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8251,12 +7969,11 @@ define <vscale x 2 x i1> @fcmp_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8267,12 +7984,11 @@ define <vscale x 2 x i1> @fcmp_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8285,12 +8001,11 @@ define <vscale x 2 x i1> @fcmp_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8302,12 +8017,11 @@ define <vscale x 2 x i1> @fcmp_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8318,12 +8032,11 @@ define <vscale x 2 x i1> @fcmp_uge_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8336,12 +8049,11 @@ define <vscale x 2 x i1> @fcmp_uge_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8353,12 +8065,11 @@ define <vscale x 2 x i1> @fcmp_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8369,12 +8080,11 @@ define <vscale x 2 x i1> @fcmp_ult_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8387,12 +8097,11 @@ define <vscale x 2 x i1> @fcmp_ult_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8404,12 +8113,11 @@ define <vscale x 2 x i1> @fcmp_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v2, v10, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8420,12 +8128,11 @@ define <vscale x 2 x i1> @fcmp_ule_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8438,12 +8145,11 @@ define <vscale x 2 x i1> @fcmp_ule_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v0, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8489,9 +8195,9 @@ define <vscale x 2 x i1> @fcmp_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8502,9 +8208,9 @@ define <vscale x 2 x i1> @fcmp_uno_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8517,9 +8223,9 @@ define <vscale x 2 x i1> @fcmp_uno_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v0, v10, fa0
+; CHECK-NEXT:    vmfne.vv v2, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8566,12 +8272,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ogt_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8582,12 +8286,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8600,12 +8302,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8617,12 +8317,10 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oge_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8633,12 +8331,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8651,12 +8347,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8668,12 +8362,10 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_olt_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8684,12 +8376,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8702,12 +8392,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8719,12 +8407,10 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ole_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8735,12 +8421,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8753,12 +8437,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8770,14 +8452,13 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_one_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8788,14 +8469,13 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8808,14 +8488,13 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8827,9 +8506,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8840,9 +8519,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8855,9 +8534,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8869,14 +8548,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ueq_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vv v4, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8887,14 +8565,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8907,14 +8584,13 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8926,12 +8602,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8942,12 +8617,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8960,12 +8634,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8977,12 +8650,11 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8993,12 +8665,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9011,12 +8682,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9028,12 +8698,11 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -9044,12 +8713,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9062,12 +8730,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9079,12 +8746,11 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v4, v12, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -9095,12 +8761,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9113,12 +8778,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9164,9 +8828,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v0, v12, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -9177,9 +8841,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9192,9 +8856,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa0
+; CHECK-NEXT:    vmfne.vv v4, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9241,12 +8905,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9257,12 +8919,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9275,12 +8935,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9292,12 +8950,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9308,12 +8964,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9326,12 +8980,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9343,12 +8995,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_olt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9359,12 +9009,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9377,12 +9025,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9394,12 +9040,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ole_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9410,12 +9054,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9428,12 +9070,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9445,14 +9085,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_one_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9463,14 +9102,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9483,14 +9121,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9502,9 +9139,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9515,9 +9152,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9530,9 +9167,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9544,14 +9181,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v24, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9562,14 +9198,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9582,14 +9217,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9601,12 +9235,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9617,12 +9250,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9635,12 +9267,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9652,12 +9283,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9668,12 +9298,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9686,12 +9315,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9703,12 +9331,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9719,12 +9346,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9737,12 +9363,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9754,12 +9379,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmfeq.vv v24, v8, v8
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16
+; CHECK-NEXT:    vmand.mm v0, v24, v0
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9770,12 +9394,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9788,12 +9411,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
+; CHECK-NEXT:    vmfeq.vf v0, v16, fa0
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v16, v0
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9839,9 +9461,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v0, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9852,9 +9474,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9867,9 +9489,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v0, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll
index 9a10359228e55d..32e925f489c6bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll
@@ -1605,9 +1605,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_oeq_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1617,9 +1617,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_oeq_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1631,9 +1631,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_oeq_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1781,9 +1781,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_one_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1793,9 +1793,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_one_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1807,9 +1807,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_one_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1821,9 +1821,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ord_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1834,9 +1834,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1849,9 +1849,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1863,9 +1863,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ueq_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1875,9 +1875,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ueq_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1889,9 +1889,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ueq_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1903,8 +1903,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ugt_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1914,8 +1914,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ugt_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1927,8 +1927,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ugt_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1940,8 +1940,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_uge_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1951,8 +1951,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_uge_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1964,8 +1964,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_uge_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1977,8 +1977,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ult_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1988,8 +1988,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ult_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2001,8 +2001,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ult_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2014,8 +2014,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ule_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2025,8 +2025,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ule_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2038,8 +2038,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ule_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2051,9 +2051,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_une_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2063,9 +2063,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_une_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2077,9 +2077,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_une_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2091,10 +2091,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_uno_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2105,10 +2105,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2121,10 +2121,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2137,9 +2137,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_oeq_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2149,9 +2149,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_oeq_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2163,9 +2163,9 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_oeq_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2313,9 +2313,9 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_one_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2325,9 +2325,9 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_one_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2339,9 +2339,9 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_one_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2353,9 +2353,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ord_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2366,9 +2366,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2381,9 +2381,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2395,9 +2395,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ueq_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2407,9 +2407,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ueq_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2421,9 +2421,9 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ueq_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2435,8 +2435,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ugt_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2446,8 +2446,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2459,8 +2459,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2472,8 +2472,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uge_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2483,8 +2483,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2496,8 +2496,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2509,8 +2509,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ult_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2520,8 +2520,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2533,8 +2533,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2546,8 +2546,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ule_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2557,8 +2557,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2570,8 +2570,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2583,9 +2583,9 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_une_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2595,9 +2595,9 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_une_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2609,9 +2609,9 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_une_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2623,10 +2623,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uno_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2637,10 +2637,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2653,10 +2653,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2669,9 +2669,9 @@ define <vscale x 32 x i1> @fcmps_oeq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_oeq_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2681,9 +2681,9 @@ define <vscale x 32 x i1> @fcmps_oeq_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_oeq_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2695,9 +2695,9 @@ define <vscale x 32 x i1> @fcmps_oeq_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_oeq_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2845,9 +2845,9 @@ define <vscale x 32 x i1> @fcmps_one_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_one_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2857,9 +2857,9 @@ define <vscale x 32 x i1> @fcmps_one_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_one_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2871,9 +2871,9 @@ define <vscale x 32 x i1> @fcmps_one_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_one_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2885,9 +2885,9 @@ define <vscale x 32 x i1> @fcmps_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ord_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2898,9 +2898,9 @@ define <vscale x 32 x i1> @fcmps_ord_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2913,9 +2913,9 @@ define <vscale x 32 x i1> @fcmps_ord_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2927,9 +2927,9 @@ define <vscale x 32 x i1> @fcmps_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ueq_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2939,9 +2939,9 @@ define <vscale x 32 x i1> @fcmps_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ueq_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2953,9 +2953,9 @@ define <vscale x 32 x i1> @fcmps_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ueq_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2967,8 +2967,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ugt_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2978,8 +2978,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2991,8 +2991,8 @@ define <vscale x 32 x i1> @fcmps_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3004,8 +3004,8 @@ define <vscale x 32 x i1> @fcmps_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uge_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3015,8 +3015,8 @@ define <vscale x 32 x i1> @fcmps_uge_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3028,8 +3028,8 @@ define <vscale x 32 x i1> @fcmps_uge_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3041,8 +3041,8 @@ define <vscale x 32 x i1> @fcmps_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ult_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3052,8 +3052,8 @@ define <vscale x 32 x i1> @fcmps_ult_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3065,8 +3065,8 @@ define <vscale x 32 x i1> @fcmps_ult_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3078,8 +3078,8 @@ define <vscale x 32 x i1> @fcmps_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ule_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3089,8 +3089,8 @@ define <vscale x 32 x i1> @fcmps_ule_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3102,8 +3102,8 @@ define <vscale x 32 x i1> @fcmps_ule_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3115,9 +3115,9 @@ define <vscale x 32 x i1> @fcmps_une_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_une_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmnand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3127,9 +3127,9 @@ define <vscale x 32 x i1> @fcmps_une_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_une_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3141,9 +3141,9 @@ define <vscale x 32 x i1> @fcmps_une_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_une_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3155,10 +3155,10 @@ define <vscale x 32 x i1> @fcmps_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uno_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3169,10 +3169,10 @@ define <vscale x 32 x i1> @fcmps_uno_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3185,10 +3185,10 @@ define <vscale x 32 x i1> @fcmps_uno_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmnot.m v16, v24
-; CHECK-NEXT:    vmfle.vv v17, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v16, v17
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmnot.m v16, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v16, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -4265,9 +4265,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_oeq_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4277,9 +4277,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_oeq_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4291,9 +4291,9 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_oeq_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4441,9 +4441,9 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_one_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4453,9 +4453,9 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_one_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4467,9 +4467,9 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_one_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4481,9 +4481,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ord_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4494,9 +4494,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4509,9 +4509,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4523,9 +4523,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ueq_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4535,9 +4535,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ueq_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4549,9 +4549,9 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ueq_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4563,8 +4563,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ugt_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4574,8 +4574,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4587,8 +4587,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4600,8 +4600,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uge_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4611,8 +4611,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4624,8 +4624,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4637,8 +4637,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ult_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4648,8 +4648,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4661,8 +4661,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4674,8 +4674,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ule_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4685,8 +4685,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4698,8 +4698,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4711,9 +4711,9 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_une_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4723,9 +4723,9 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_une_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4737,9 +4737,9 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_une_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4751,10 +4751,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uno_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4765,10 +4765,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4781,10 +4781,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4797,9 +4797,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_oeq_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -4809,9 +4809,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_oeq_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -4823,9 +4823,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_oeq_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -4973,9 +4973,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_one_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -4985,9 +4985,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_one_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -4999,9 +4999,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_one_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5013,9 +5013,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ord_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5026,9 +5026,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5041,9 +5041,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5055,9 +5055,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ueq_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5067,9 +5067,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ueq_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5081,9 +5081,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ueq_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5095,8 +5095,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ugt_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5106,8 +5106,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5119,8 +5119,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5132,8 +5132,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uge_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5143,8 +5143,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5156,8 +5156,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5169,8 +5169,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ult_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5180,8 +5180,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5193,8 +5193,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5206,8 +5206,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ule_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5217,8 +5217,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5230,8 +5230,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5243,9 +5243,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_une_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5255,9 +5255,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_une_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5269,9 +5269,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_une_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5283,10 +5283,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uno_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5297,10 +5297,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5313,10 +5313,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5329,9 +5329,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_oeq_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5341,9 +5341,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_oeq_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5355,9 +5355,9 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_oeq_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5505,9 +5505,9 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_one_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5517,9 +5517,9 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_one_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5531,9 +5531,9 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_one_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5545,9 +5545,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ord_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5558,9 +5558,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5573,9 +5573,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5587,9 +5587,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ueq_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5599,9 +5599,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ueq_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5613,9 +5613,9 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ueq_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5627,8 +5627,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ugt_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5638,8 +5638,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ugt_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5651,8 +5651,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ugt_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5664,8 +5664,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_uge_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5675,8 +5675,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_uge_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5688,8 +5688,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_uge_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5701,8 +5701,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ult_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5712,8 +5712,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ult_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5725,8 +5725,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ult_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5738,8 +5738,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ule_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5749,8 +5749,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ule_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5762,8 +5762,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ule_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5775,9 +5775,9 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_une_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmnand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5787,9 +5787,9 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_une_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5801,9 +5801,9 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_une_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5815,10 +5815,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_uno_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5829,10 +5829,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5845,10 +5845,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmnot.m v16, v24
-; CHECK-NEXT:    vmfle.vv v17, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v16, v17
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmnot.m v16, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v16, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6393,9 +6393,9 @@ define <vscale x 2 x i1> @fcmps_oeq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_oeq_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6405,9 +6405,9 @@ define <vscale x 2 x i1> @fcmps_oeq_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_oeq_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6419,9 +6419,9 @@ define <vscale x 2 x i1> @fcmps_oeq_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_oeq_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6569,9 +6569,9 @@ define <vscale x 2 x i1> @fcmps_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_one_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6581,9 +6581,9 @@ define <vscale x 2 x i1> @fcmps_one_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_one_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6595,9 +6595,9 @@ define <vscale x 2 x i1> @fcmps_one_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_one_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6609,9 +6609,9 @@ define <vscale x 2 x i1> @fcmps_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ord_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6622,9 +6622,9 @@ define <vscale x 2 x i1> @fcmps_ord_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6637,9 +6637,9 @@ define <vscale x 2 x i1> @fcmps_ord_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v2
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6651,9 +6651,9 @@ define <vscale x 2 x i1> @fcmps_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ueq_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6663,9 +6663,9 @@ define <vscale x 2 x i1> @fcmps_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ueq_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6677,9 +6677,9 @@ define <vscale x 2 x i1> @fcmps_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ueq_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6691,8 +6691,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ugt_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6702,8 +6702,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6715,8 +6715,8 @@ define <vscale x 2 x i1> @fcmps_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6728,8 +6728,8 @@ define <vscale x 2 x i1> @fcmps_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uge_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6739,8 +6739,8 @@ define <vscale x 2 x i1> @fcmps_uge_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6752,8 +6752,8 @@ define <vscale x 2 x i1> @fcmps_uge_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6765,8 +6765,8 @@ define <vscale x 2 x i1> @fcmps_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ult_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6776,8 +6776,8 @@ define <vscale x 2 x i1> @fcmps_ult_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6789,8 +6789,8 @@ define <vscale x 2 x i1> @fcmps_ult_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6802,8 +6802,8 @@ define <vscale x 2 x i1> @fcmps_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ule_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6813,8 +6813,8 @@ define <vscale x 2 x i1> @fcmps_ule_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6826,8 +6826,8 @@ define <vscale x 2 x i1> @fcmps_ule_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6839,9 +6839,9 @@ define <vscale x 2 x i1> @fcmps_une_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_une_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6851,9 +6851,9 @@ define <vscale x 2 x i1> @fcmps_une_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_une_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6865,9 +6865,9 @@ define <vscale x 2 x i1> @fcmps_une_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_une_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v2, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6879,10 +6879,10 @@ define <vscale x 2 x i1> @fcmps_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uno_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v10
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6893,10 +6893,10 @@ define <vscale x 2 x i1> @fcmps_uno_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmfle.vv v2, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v2
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6909,10 +6909,10 @@ define <vscale x 2 x i1> @fcmps_uno_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v0, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6925,9 +6925,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_oeq_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -6937,9 +6937,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_oeq_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -6951,9 +6951,9 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_oeq_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7101,9 +7101,9 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_one_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7113,9 +7113,9 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_one_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7127,9 +7127,9 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_one_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7141,9 +7141,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ord_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7154,9 +7154,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7169,9 +7169,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v4
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7183,9 +7183,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ueq_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7195,9 +7195,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ueq_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7209,9 +7209,9 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ueq_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7223,8 +7223,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ugt_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7234,8 +7234,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7247,8 +7247,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7260,8 +7260,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uge_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7271,8 +7271,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7284,8 +7284,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7297,8 +7297,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ult_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7308,8 +7308,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7321,8 +7321,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7334,8 +7334,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ule_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7345,8 +7345,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7358,8 +7358,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7371,9 +7371,9 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_une_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7383,9 +7383,9 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_une_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7397,9 +7397,9 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_une_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v4, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7411,10 +7411,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uno_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7425,10 +7425,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmfle.vv v4, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v4
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7441,10 +7441,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v0, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7457,9 +7457,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_oeq_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7469,9 +7469,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_oeq_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7483,9 +7483,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_oeq_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7633,9 +7633,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_one_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7645,9 +7645,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_one_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7659,9 +7659,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_one_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7673,9 +7673,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7686,9 +7686,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7701,9 +7701,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7715,9 +7715,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ueq_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7727,9 +7727,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ueq_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7741,9 +7741,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ueq_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7755,8 +7755,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ugt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7766,8 +7766,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7779,8 +7779,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7792,8 +7792,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7803,8 +7803,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7816,8 +7816,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7829,8 +7829,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ult_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7840,8 +7840,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7853,8 +7853,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7866,8 +7866,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ule_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7877,8 +7877,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7890,8 +7890,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7903,9 +7903,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_une_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmnand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7915,9 +7915,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_une_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7929,9 +7929,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_une_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7943,10 +7943,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7957,10 +7957,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7973,10 +7973,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmnot.m v16, v24
-; CHECK-NEXT:    vmfle.vv v17, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v16, v17
+; CHECK-NEXT:    vmfle.vf v0, v16, fa0
+; CHECK-NEXT:    vmnot.m v16, v0
+; CHECK-NEXT:    vmfle.vv v0, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v16, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index b888fde7d06836..63fa6e001cdaac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -932,13 +932,13 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v12, v16
 ; ZVFHMIN-NEXT:    vand.vi v12, v12, 1
-; ZVFHMIN-NEXT:    vmsne.vi v16, v12, 0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v24
+; ZVFHMIN-NEXT:    vmsne.vi v4, v12, 0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v16
 ; ZVFHMIN-NEXT:    vand.vi v8, v8, 1
 ; ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v0, v16, a0
+; ZVFHMIN-NEXT:    vslideup.vx v0, v4, a0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1>
   ret <vscale x 32 x i1> %evec
@@ -962,13 +962,13 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v12, v16
 ; ZVFHMIN-NEXT:    vand.vi v12, v12, 1
-; ZVFHMIN-NEXT:    vmsne.vi v16, v12, 0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v24
+; ZVFHMIN-NEXT:    vmsne.vi v4, v12, 0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v16
 ; ZVFHMIN-NEXT:    vand.vi v8, v8, 1
 ; ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v0, v16, a0
+; ZVFHMIN-NEXT:    vslideup.vx v0, v4, a0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1>
   ret <vscale x 32 x i1> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
index e5048eaf9d0c23..9fc87b58337119 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
@@ -76,9 +76,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptosi_nxv2i1_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
index 4b609d07c1e7ae..f9ffcdb94907b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
@@ -76,9 +76,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptoui_nxv2i1_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
index ffeb399291e1f9..db568dd0f8d84f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
@@ -191,11 +191,11 @@ define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfeq.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfeq.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16(
@@ -243,9 +243,9 @@ define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v20, v8, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -399,11 +399,11 @@ define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfeq.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfeq.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32(
@@ -451,9 +451,9 @@ define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v20, v8, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -555,11 +555,11 @@ define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfeq.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfeq.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64(
@@ -607,9 +607,9 @@ define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vv v20, v8, v12
+; CHECK-NEXT:    vmfeq.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -799,11 +799,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfeq.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16.f16(
@@ -846,11 +846,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16.f16(
@@ -987,11 +987,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfeq.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32.f32(
@@ -1034,11 +1034,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32.f32(
@@ -1128,11 +1128,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfeq.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64(
@@ -1175,11 +1175,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
index 993b50a1c81ce8..73e47dae0e5743 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
@@ -191,11 +191,11 @@ define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmfle.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfle.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16(
@@ -243,9 +243,9 @@ define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfle.vv v20, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -399,11 +399,11 @@ define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmfle.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfle.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32(
@@ -451,9 +451,9 @@ define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmfle.vv v20, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -555,11 +555,11 @@ define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmfle.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfle.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64(
@@ -607,9 +607,9 @@ define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmfle.vv v20, v12, v8
+; CHECK-NEXT:    vmfle.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -799,11 +799,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16.f16(
@@ -846,11 +846,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16.f16(
@@ -987,11 +987,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32.f32(
@@ -1034,11 +1034,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32.f32(
@@ -1128,11 +1128,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfge.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64(
@@ -1175,11 +1175,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfge.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
index 427f0eb28e7df5..802981f44dac1e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
@@ -191,11 +191,11 @@ define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmflt.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16(
@@ -243,9 +243,9 @@ define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmflt.vv v20, v12, v8
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -399,11 +399,11 @@ define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmflt.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32(
@@ -451,9 +451,9 @@ define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vv v20, v12, v8
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -555,11 +555,11 @@ define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmflt.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64(
@@ -607,9 +607,9 @@ define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vv v20, v12, v8
+; CHECK-NEXT:    vmflt.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -799,11 +799,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16.f16(
@@ -846,11 +846,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16.f16(
@@ -987,11 +987,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32.f32(
@@ -1034,11 +1034,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32.f32(
@@ -1128,11 +1128,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfgt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64(
@@ -1175,11 +1175,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfgt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
index e5327632fc04f6..aa04ca561a6f2e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
@@ -191,11 +191,11 @@ define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmfle.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfle.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16(
@@ -243,9 +243,9 @@ define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfle.vv v20, v8, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfle.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -399,11 +399,11 @@ define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmfle.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfle.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32(
@@ -451,9 +451,9 @@ define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmfle.vv v20, v8, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfle.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -555,11 +555,11 @@ define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmfle.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfle.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64(
@@ -607,9 +607,9 @@ define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmfle.vv v20, v8, v12
+; CHECK-NEXT:    vmfle.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfle.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -799,11 +799,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16.f16(
@@ -846,11 +846,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16.f16(
@@ -987,11 +987,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32.f32(
@@ -1034,11 +1034,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32.f32(
@@ -1128,11 +1128,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfle.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64(
@@ -1175,11 +1175,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfle.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
index 64f257e355ceae..11ea0309c07bdd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
@@ -191,11 +191,11 @@ define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmflt.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16(
@@ -243,9 +243,9 @@ define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmflt.vv v20, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -399,11 +399,11 @@ define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmflt.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32(
@@ -451,9 +451,9 @@ define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vv v20, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -555,11 +555,11 @@ define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmflt.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64(
@@ -607,9 +607,9 @@ define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vv v20, v8, v12
+; CHECK-NEXT:    vmflt.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -799,11 +799,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16.f16(
@@ -846,11 +846,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16.f16(
@@ -987,11 +987,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32.f32(
@@ -1034,11 +1034,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32.f32(
@@ -1128,11 +1128,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmflt.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64(
@@ -1175,11 +1175,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmflt.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
index 6f6a2a5e8783c6..d0b64fa7d4f9f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
@@ -191,11 +191,11 @@ define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmfne.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfne.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfne.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfne.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16(
@@ -243,9 +243,9 @@ define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmfne.vv v20, v8, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfne.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -399,11 +399,11 @@ define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmfne.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfne.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfne.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfne.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32(
@@ -451,9 +451,9 @@ define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmfne.vv v20, v8, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfne.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -555,11 +555,11 @@ define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmfne.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmfne.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfne.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmfne.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64(
@@ -607,9 +607,9 @@ define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmfne.vv v20, v8, v12
+; CHECK-NEXT:    vmfne.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmfne.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -799,11 +799,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfne.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16.f16(
@@ -846,11 +846,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfne.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16.f16(
@@ -987,11 +987,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfne.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32.f32(
@@ -1034,11 +1034,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfne.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32.f32(
@@ -1128,11 +1128,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmfne.vf v2, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64(
@@ -1175,11 +1175,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfne.vf v4, v8, fa0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
index da1c751b566304..bc3fbcbb4cb436 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmseq.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmseq.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmseq.vv v20, v8, v12
+; CHECK-NEXT:    vmseq.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmseq.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmseq.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmseq.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmseq.vv v20, v8, v12
+; CHECK-NEXT:    vmseq.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmseq.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmseq.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmseq.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmseq.vv v20, v8, v12
+; CHECK-NEXT:    vmseq.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmseq.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmseq.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmseq.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmseq.vv v20, v8, v12
+; CHECK-NEXT:    vmseq.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmseq.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmseq.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmseq.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmseq_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmseq.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmseq.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmseq.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmseq.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmseq.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmseq.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmseq.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmseq.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmseq.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmseq.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmseq.vv v11, v8, v12, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmseq.vv v2, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmseq.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmseq.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmseq.vv v13, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmseq.vv v4, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmseq.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmseq.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmseq.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmseq.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmseq_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmseq.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmseq.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmseq.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmseq.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmseq.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmseq.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmseq.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmseq.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmseq.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmseq.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmseq.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmseq.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmseq.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmseq.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
index 502fb9b24148f7..2ed626c7751e5c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v12, v8
+; CHECK-NEXT:    vmsle.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v12, v8
+; CHECK-NEXT:    vmsle.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v12, v8
+; CHECK-NEXT:    vmsle.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v12, v8
+; CHECK-NEXT:    vmsle.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1141,8 +1141,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i8_i8(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8(
@@ -1163,11 +1163,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsge_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v11, v10
+; CHECK-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v2, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8(
@@ -1189,8 +1189,8 @@ define <vscale x 32 x i1> @intrinsic_vmsge_vx_nxv32i8_i8(<vscale x 32 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8(
@@ -1211,11 +1211,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsge_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v13, v12
+; CHECK-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v4, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8(
@@ -1381,8 +1381,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i16_i16(<vscale x 8 x i16> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16(
@@ -1403,11 +1403,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v11, v10
+; CHECK-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v2, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16(
@@ -1429,8 +1429,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i16_i16(<vscale x 16 x i16> %
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16(
@@ -1451,11 +1451,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsge_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v13, v12
+; CHECK-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v4, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16(
@@ -1573,8 +1573,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i32_i32(<vscale x 4 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32(
@@ -1595,11 +1595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v11, v10
+; CHECK-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v2, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32(
@@ -1621,8 +1621,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i32_i32(<vscale x 8 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32(
@@ -1643,11 +1643,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v13, v12
+; CHECK-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v4, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32(
@@ -1756,8 +1756,8 @@ define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsge_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmslt.vx v10, v8, a0
-; RV64-NEXT:    vmnot.m v0, v10
+; RV64-NEXT:    vmslt.vx v0, v8, a0
+; RV64-NEXT:    vmnot.m v0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64(
@@ -1784,20 +1784,20 @@ define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmsle.vv v11, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmsle.vv v2, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmxor.mm v0, v11, v10
+; RV64-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmxor.mm v0, v2, v10
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64(
@@ -1831,8 +1831,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsge_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0
-; RV64-NEXT:    vmnot.m v0, v12
+; RV64-NEXT:    vmslt.vx v0, v8, a0
+; RV64-NEXT:    vmnot.m v0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64(
@@ -1859,20 +1859,20 @@ define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmsle.vv v13, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmsle.vv v4, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmxor.mm v0, v13, v12
+; RV64-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmxor.mm v0, v4, v12
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64(
@@ -2043,11 +2043,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsge_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, -7, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, -7, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8(
@@ -2078,11 +2078,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsge_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, -5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, -5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8(
@@ -2218,11 +2218,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, 2, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, 2, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16(
@@ -2253,11 +2253,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsge_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, 4, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16(
@@ -2358,11 +2358,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, 10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, 10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32(
@@ -2393,11 +2393,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, 12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, 12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32(
@@ -2463,11 +2463,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, 8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, 8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64(
@@ -2498,11 +2498,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, 8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, 8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64(
@@ -2592,8 +2592,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmslt.vx v2, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8(
@@ -2610,8 +2610,8 @@ define <vscale x 32 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmslt.vx v4, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8(
@@ -2682,8 +2682,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmslt.vx v2, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16(
@@ -2700,8 +2700,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmslt.vx v4, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16(
@@ -2754,8 +2754,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmslt.vx v2, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32(
@@ -2772,8 +2772,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmslt.vx v4, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32(
@@ -2824,18 +2824,16 @@ define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64(<vscale
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v10, v0
-; RV32-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmslt.vx v10, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v10
+; RV64-NEXT:    vmslt.vx v2, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64(
@@ -2856,18 +2854,16 @@ define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64(<vscale
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v12, v0
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v12
+; RV64-NEXT:    vmslt.vx v4, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
index 9410a99d81423f..0ce7dae12a1e4f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v12, v8
+; CHECK-NEXT:    vmsleu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v12, v8
+; CHECK-NEXT:    vmsleu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v12, v8
+; CHECK-NEXT:    vmsleu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v12, v8
+; CHECK-NEXT:    vmsleu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1141,8 +1141,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i8_i8(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8.i8(
@@ -1163,11 +1163,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v11, v10
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v2, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8(
@@ -1189,8 +1189,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_vx_nxv32i8_i8(<vscale x 32 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8.i8(
@@ -1211,11 +1211,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v13, v12
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v4, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8(
@@ -1381,8 +1381,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i16_i16(<vscale x 8 x i16> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16.i16(
@@ -1403,11 +1403,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v11, v10
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v2, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16(
@@ -1429,8 +1429,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i16_i16(<vscale x 16 x i16>
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16.i16(
@@ -1451,11 +1451,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v13, v12
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v4, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16(
@@ -1573,8 +1573,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i32_i32(<vscale x 4 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32.i32(
@@ -1595,11 +1595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v11, v10
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v2, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32(
@@ -1621,8 +1621,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i32_i32(<vscale x 8 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32.i32(
@@ -1643,11 +1643,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmxor.mm v0, v13, v12
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmxor.mm v0, v4, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32(
@@ -1756,8 +1756,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmsltu.vx v10, v8, a0
-; RV64-NEXT:    vmnot.m v0, v10
+; RV64-NEXT:    vmsltu.vx v0, v8, a0
+; RV64-NEXT:    vmnot.m v0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64.i64(
@@ -1784,20 +1784,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmsleu.vv v11, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmsleu.vv v2, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmxor.mm v0, v11, v10
+; RV64-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmxor.mm v0, v2, v10
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64(
@@ -1831,8 +1831,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0
-; RV64-NEXT:    vmnot.m v0, v12
+; RV64-NEXT:    vmsltu.vx v0, v8, a0
+; RV64-NEXT:    vmnot.m v0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64.i64(
@@ -1859,20 +1859,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmsleu.vv v13, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmsleu.vv v4, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmxor.mm v0, v13, v12
+; RV64-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmxor.mm v0, v4, v12
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64(
@@ -2043,11 +2043,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, -7, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, -7, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8(
@@ -2078,11 +2078,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, -5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, -5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8(
@@ -2230,11 +2230,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, 2, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, 2, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16(
@@ -2265,11 +2265,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, 4, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16(
@@ -2370,11 +2370,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, 10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, 10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32(
@@ -2405,11 +2405,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, 12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, 12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32(
@@ -2475,11 +2475,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, -16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, -16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64(
@@ -2510,11 +2510,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, -14, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, -14, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64(
@@ -2604,8 +2604,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8(
@@ -2622,8 +2622,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8(
@@ -2694,8 +2694,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16(
@@ -2712,8 +2712,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16(<vsca
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16(
@@ -2766,8 +2766,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32(
@@ -2784,8 +2784,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32(
@@ -2836,18 +2836,16 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64(<vscale
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v10, v0
-; RV32-NEXT:    vmsleu.vv v10, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmsleu.vv v0, v10, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmsltu.vx v10, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v10
+; RV64-NEXT:    vmsltu.vx v2, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64(
@@ -2868,18 +2866,16 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64(<vscale
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v12, v0
-; RV32-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v12
+; RV64-NEXT:    vmsltu.vx v4, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
index b7a676e7f2dd37..aed9882de6266f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v12, v8
+; CHECK-NEXT:    vmslt.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v12, v8
+; CHECK-NEXT:    vmslt.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v12, v8
+; CHECK-NEXT:    vmslt.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v12, v8
+; CHECK-NEXT:    vmslt.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmslt.vv v11, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmslt.vv v2, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmsgt.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmsgt.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmslt.vv v13, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmslt.vv v4, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmsgt.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmsgt.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgt.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgt.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgt.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgt.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
index 88a632de067a68..e798d39e699467 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v12, v8
+; CHECK-NEXT:    vmsltu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v12, v8
+; CHECK-NEXT:    vmsltu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v12, v8
+; CHECK-NEXT:    vmsltu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v10, v8
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v10, v8
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v12, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v12, v8
+; CHECK-NEXT:    vmsltu.vv v4, v12, v8
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmsltu.vv v11, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmsltu.vv v2, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmsgtu.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmsgtu.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmsltu.vv v13, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmsltu.vv v4, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmsgtu.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmsgtu.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsgtu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsgtu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsgtu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsgtu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
index 2248ba03adfe79..a7a754fe4a20dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v8, v12
+; CHECK-NEXT:    vmsle.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v8, v12
+; CHECK-NEXT:    vmsle.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v8, v12
+; CHECK-NEXT:    vmsle.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmsle.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsle.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsle.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsle.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmsle.vv v20, v8, v12
+; CHECK-NEXT:    vmsle.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsle.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsle_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsle_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsle_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsle_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsle_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmsle_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmsle.vv v11, v8, v12, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmsle.vv v2, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmsle.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmsle.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmsle.vv v13, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmsle.vv v4, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmsle.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmsle.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsle_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsle_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsle_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsle_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsle_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsle_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsle_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsle_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
index 57bae83b25e0e5..c382d47ea83b04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v8, v12
+; CHECK-NEXT:    vmsleu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v8, v12
+; CHECK-NEXT:    vmsleu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v8, v12
+; CHECK-NEXT:    vmsleu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmsleu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsleu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsleu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsleu.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmsleu.vv v20, v8, v12
+; CHECK-NEXT:    vmsleu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsleu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmsleu.vv v11, v8, v12, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmsleu.vv v2, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmsleu.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmsleu.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmsleu.vv v13, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmsleu.vv v4, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmsleu.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmsleu.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
index 6783f7feb624c5..9ba5a4045cfa48 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v8, v12
+; CHECK-NEXT:    vmslt.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v8, v12
+; CHECK-NEXT:    vmslt.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v8, v12
+; CHECK-NEXT:    vmslt.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmslt.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmslt.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmslt.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmslt.vv v20, v8, v12
+; CHECK-NEXT:    vmslt.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmslt.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmslt_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmslt_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmslt_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmslt_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmslt_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmslt_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmslt.vv v11, v8, v12, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmslt.vv v2, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmslt.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmslt.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmslt.vv v13, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmslt.vv v4, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmslt.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmslt.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmslt_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, -7, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, -7, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmslt_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, -5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, -5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmslt_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, 2, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, 2, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmslt_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, 4, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmslt_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, 10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, 10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmslt_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, 12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, 12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmslt_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsle.vi v11, v8, 8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsle.vi v2, v8, 8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmslt_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsle.vi v13, v8, 8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsle.vi v4, v8, 8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
index b082b735a02072..2a92357b386da0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v8, v12
+; CHECK-NEXT:    vmsltu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v8, v12
+; CHECK-NEXT:    vmsltu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v8, v12
+; CHECK-NEXT:    vmsltu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmsltu.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsltu.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsltu.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsltu.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmsltu.vv v20, v8, v12
+; CHECK-NEXT:    vmsltu.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsltu.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmsltu.vv v11, v8, v12, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmsltu.vv v2, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmsltu.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmsltu.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmsltu.vv v13, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmsltu.vv v4, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmsltu.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmsltu.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, -7, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, -7, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, -5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, -5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, 2, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, 2, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, 4, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, 10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, 10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, 12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, 12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsleu.vi v11, v8, -16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsleu.vi v2, v8, -16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsleu.vi v13, v8, -14, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsleu.vi v4, v8, -14, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
index bb4575e5d72cbe..27f2dfea36b4ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
@@ -243,11 +243,11 @@ define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsne.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsne.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i8(
@@ -295,9 +295,9 @@ define <vscale x 32 x i1> @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsne.vv v20, v8, v12
+; CHECK-NEXT:    vmsne.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsne.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -503,11 +503,11 @@ define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmsne.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsne.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i16(
@@ -555,9 +555,9 @@ define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmsne.vv v20, v8, v12
+; CHECK-NEXT:    vmsne.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsne.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -711,11 +711,11 @@ define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmsne.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsne.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32(
@@ -763,9 +763,9 @@ define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmsne.vv v20, v8, v12
+; CHECK-NEXT:    vmsne.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsne.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -867,11 +867,11 @@ define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmsne.vv v14, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v14
-; CHECK-NEXT:    vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vv v2, v8, v10
+; CHECK-NEXT:    vmv1r.v v4, v0
+; CHECK-NEXT:    vmv1r.v v0, v2
+; CHECK-NEXT:    vmsne.vv v4, v10, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %mask = call <vscale x 2 x i1> @llvm.riscv.vmsne.nxv2i64(
@@ -919,9 +919,9 @@ define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmsne.vv v20, v8, v12
+; CHECK-NEXT:    vmsne.vv v4, v8, v12
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v20
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vmsne.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
@@ -1158,11 +1158,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8(
 define <vscale x 16 x i1> @intrinsic_vmsne_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsne.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsne.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8(
@@ -1205,11 +1205,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8(
 define <vscale x 32 x i1> @intrinsic_vmsne_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsne.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsne.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8(
@@ -1393,11 +1393,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16(
 define <vscale x 8 x i1> @intrinsic_vmsne_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsne.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsne.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16(
@@ -1440,11 +1440,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16(
 define <vscale x 16 x i1> @intrinsic_vmsne_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsne.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsne.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16(
@@ -1581,11 +1581,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32(
 define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsne.vx v11, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsne.vx v2, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32(
@@ -1628,11 +1628,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32(
 define <vscale x 8 x i1> @intrinsic_vmsne_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsne.vx v13, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsne.vx v4, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32(
@@ -1767,20 +1767,20 @@ define <vscale x 2 x i1> @intrinsic_vmsne_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
 ; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v11, v0
+; RV32-NEXT:    vmv1r.v v2, v0
 ; RV32-NEXT:    vmv1r.v v0, v10
-; RV32-NEXT:    vmsne.vv v11, v8, v12, v0.t
-; RV32-NEXT:    vmv1r.v v0, v11
+; RV32-NEXT:    vmsne.vv v2, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v11, v0
+; RV64-NEXT:    vmv1r.v v2, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v10
-; RV64-NEXT:    vmsne.vx v11, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v11
+; RV64-NEXT:    vmsne.vx v2, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64.i64(
@@ -1841,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
 ; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v13, v0
+; RV32-NEXT:    vmv1r.v v4, v0
 ; RV32-NEXT:    vmv1r.v v0, v12
-; RV32-NEXT:    vmsne.vv v13, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmsne.vv v4, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v4
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    vmv1r.v v13, v0
+; RV64-NEXT:    vmv1r.v v4, v0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
-; RV64-NEXT:    vmsne.vx v13, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vmsne.vx v4, v8, a0, v0.t
+; RV64-NEXT:    vmv1r.v v0, v4
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64.i64(
@@ -2025,11 +2025,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsne_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsne.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsne.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8(
@@ -2060,11 +2060,11 @@ entry:
 define <vscale x 32 x i1> @intrinsic_vmsne_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsne.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsne.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8(
@@ -2200,11 +2200,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsne_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsne.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsne.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16(
@@ -2235,11 +2235,11 @@ entry:
 define <vscale x 16 x i1> @intrinsic_vmsne_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsne.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsne.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16(
@@ -2340,11 +2340,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsne_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsne.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsne.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32(
@@ -2375,11 +2375,11 @@ entry:
 define <vscale x 8 x i1> @intrinsic_vmsne_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsne.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsne.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32(
@@ -2445,11 +2445,11 @@ entry:
 define <vscale x 2 x i1> @intrinsic_vmsne_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v11, v0
+; CHECK-NEXT:    vmv1r.v v2, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmsne.vi v11, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmsne.vi v2, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64.i64(
@@ -2480,11 +2480,11 @@ entry:
 define <vscale x 4 x i1> @intrinsic_vmsne_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v13, v0
+; CHECK-NEXT:    vmv1r.v v4, v0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmsne.vi v13, v8, 9, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmsne.vi v4, v8, 9, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
index ceb6a164e20df6..6522f0e9efe65e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
@@ -162,8 +162,7 @@ define <vscale x 16 x i1> @test_vp_reverse_nxv16i1_masked(<vscale x 16 x i1> %sr
 ; CHECK-NEXT:    vrsub.vx v12, v12, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vrgatherei16.vv v16, v10, v12, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v16, 0, v0.t
 ; CHECK-NEXT:    ret
   %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 %evl)
   ret <vscale x 16 x i1> %dst
@@ -200,8 +199,7 @@ define <vscale x 32 x i1> @test_vp_reverse_nxv32i1_masked(<vscale x 32 x i1> %sr
 ; CHECK-NEXT:    vrsub.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vrgatherei16.vv v24, v12, v16, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v24, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v24, 0, v0.t
 ; CHECK-NEXT:    ret
   %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 %evl)
   ret <vscale x 32 x i1> %dst
@@ -245,8 +243,7 @@ define <vscale x 64 x i1> @test_vp_reverse_nxv64i1_masked(<vscale x 64 x i1> %sr
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vslidedown.vx v16, v16, a1, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v16, 0, v0.t
 ; CHECK-NEXT:    ret
   %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 %evl)
   ret <vscale x 64 x i1> %dst
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
index 668cff23429366..815cb1f57631c3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
@@ -369,8 +369,7 @@ define <vscale x 16 x i1> @test_vp_splice_nxv16i1_masked(<vscale x 16 x i1> %va,
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vslideup.vx v10, v12, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v10, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> %va, <vscale x 16 x i1> %vb, i32 5, <vscale x 16 x i1> %mask, i32 %evla, i32 %evlb)
   ret <vscale x 16 x i1> %v
@@ -443,8 +442,7 @@ define <vscale x 32 x i1> @test_vp_splice_nxv32i1_masked(<vscale x 32 x i1> %va,
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vslideup.vx v16, v12, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v16, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x i1> @llvm.experimental.vp.splice.nxv32i1(<vscale x 32 x i1> %va, <vscale x 32 x i1> %vb, i32 5, <vscale x 32 x i1> %mask, i32 %evla, i32 %evlb)
   ret <vscale x 32 x i1> %v
@@ -517,8 +515,7 @@ define <vscale x 64 x i1> @test_vp_splice_nxv64i1_masked(<vscale x 64 x i1> %va,
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
 ; CHECK-NEXT:    vslideup.vx v24, v16, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v24, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v24, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 64 x i1> @llvm.experimental.vp.splice.nxv64i1(<vscale x 64 x i1> %va, <vscale x 64 x i1> %vb, i32 5, <vscale x 64 x i1> %mask, i32 %evla, i32 %evlb)
   ret <vscale x 64 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
index 4457c1002acc7b..d4b4cb503c76e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
@@ -448,37 +448,29 @@ define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> %v
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    vmv8r.v v0, v8
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    vl8re64.v v8, (a1)
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v24, v16, 0
+; CHECK-NEXT:    vmseq.vi v16, v16, 0
+; CHECK-NEXT:    vmseq.vi v0, v0, 0
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vvm v24, v8, v24, v0
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v0, v16, 0
-; CHECK-NEXT:    vmv.v.i v16, 0
-; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT:    vmv.v.v v8, v24
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
index ad8097631acd33..cbb7cdec8d6ed9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
@@ -56,9 +56,8 @@ define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i64(<vscale x 2 x i64> %a, <vscale x
 ; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vand.vi v10, v8, 1, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x i1> %v



More information about the llvm-commits mailing list