[llvm] a4729f7 - [RISCV] Lower RVV vector SELECTs to VSELECTs

Fraser Cormack via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 24 02:21:40 PDT 2021


Author: Fraser Cormack
Date: 2021-06-24T10:12:51+01:00
New Revision: a4729f7f88097f9f1afb9a7294ee035888e7f687

URL: https://github.com/llvm/llvm-project/commit/a4729f7f88097f9f1afb9a7294ee035888e7f687
DIFF: https://github.com/llvm/llvm-project/commit/a4729f7f88097f9f1afb9a7294ee035888e7f687.diff

LOG: [RISCV] Lower RVV vector SELECTs to VSELECTs

This patch optimizes the code generation of vector-type SELECTs (LLVM
select instructions with scalar conditions) by custom-lowering to
VSELECTs (LLVM select instructions with vector conditions) by splatting
the condition to a vector. This avoids the default expansion path which
would either introduce control flow or fully scalarize.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D104772

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
    llvm/test/CodeGen/RISCV/rvv/select-fp.ll
    llvm/test/CodeGen/RISCV/rvv/select-int.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 32046a452d94a..8a2e96d44399a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -451,7 +451,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 
-      setOperationAction(ISD::SELECT, VT, Expand);
+      setOperationAction(ISD::SELECT, VT, Custom);
       setOperationAction(ISD::SELECT_CC, VT, Expand);
       setOperationAction(ISD::VSELECT, VT, Expand);
 
@@ -538,7 +538,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
 
-      setOperationAction(ISD::SELECT, VT, Expand);
+      setOperationAction(ISD::SELECT, VT, Custom);
       setOperationAction(ISD::SELECT_CC, VT, Expand);
 
       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
@@ -598,7 +598,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::MGATHER, VT, Custom);
       setOperationAction(ISD::MSCATTER, VT, Custom);
 
-      setOperationAction(ISD::SELECT, VT, Expand);
+      setOperationAction(ISD::SELECT, VT, Custom);
       setOperationAction(ISD::SELECT_CC, VT, Expand);
 
       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
@@ -668,6 +668,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
         setOperationAction(ISD::SETCC, VT, Custom);
 
+        setOperationAction(ISD::SELECT, VT, Custom);
+
         setOperationAction(ISD::TRUNCATE, VT, Custom);
 
         setOperationAction(ISD::BITCAST, VT, Custom);
@@ -729,7 +731,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::MULHU, VT, Custom);
 
         setOperationAction(ISD::VSELECT, VT, Custom);
-        setOperationAction(ISD::SELECT, VT, Expand);
         setOperationAction(ISD::SELECT_CC, VT, Expand);
 
         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
@@ -794,7 +795,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           setCondCodeAction(CC, VT, Expand);
 
         setOperationAction(ISD::VSELECT, VT, Custom);
-        setOperationAction(ISD::SELECT, VT, Expand);
+        setOperationAction(ISD::SELECT, VT, Custom);
         setOperationAction(ISD::SELECT_CC, VT, Expand);
 
         setOperationAction(ISD::BITCAST, VT, Custom);
@@ -2736,19 +2737,29 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue TrueV = Op.getOperand(1);
   SDValue FalseV = Op.getOperand(2);
   SDLoc DL(Op);
+  MVT VT = Op.getSimpleValueType();
   MVT XLenVT = Subtarget.getXLenVT();
 
+  // Lower vector SELECTs to VSELECTs by splatting the condition.
+  if (VT.isVector()) {
+    MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
+    SDValue CondSplat = VT.isScalableVector()
+                            ? DAG.getSplatVector(SplatCondVT, DL, CondV)
+                            : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
+    return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
+  }
+
   // If the result type is XLenVT and CondV is the output of a SETCC node
   // which also operated on XLenVT inputs, then merge the SETCC node into the
   // lowered RISCVISD::SELECT_CC to take advantage of the integer
   // compare+branch instructions. i.e.:
   // (select (setcc lhs, rhs, cc), truev, falsev)
   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
-  if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
+  if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
     SDValue LHS = CondV.getOperand(0);
     SDValue RHS = CondV.getOperand(1);
-    auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
+    const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
     ISD::CondCode CCVal = CC->get();
 
     // Special case for a select of 2 constants that have a 
diff ence of 1.

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
index c027242dce732..7a3bdb4081313 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
@@ -1,30 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) {
 ; CHECK-LABEL: select_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bnez a0, .LBB0_2
-; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB0_3
-; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB0_3:
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, ft1
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x half> %a, <2 x half> %b
   ret <2 x half> %v
@@ -34,28 +21,11 @@ define <2 x half> @selectcc_v2f16(half %a, half %b, <2 x half> %c, <2 x half> %d
 ; CHECK-LABEL: selectcc_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    feq.h a0, fa0, fa1
-; CHECK-NEXT:    bnez a0, .LBB1_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB1_3
-; CHECK-NEXT:  .LBB1_2:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB1_3:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v25, ft0
-; CHECK-NEXT:    bnez a0, .LBB1_5
-; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    j .LBB1_6
-; CHECK-NEXT:  .LBB1_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:  .LBB1_6:
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmv.s.f v25, ft0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <2 x half> %c, <2 x half> %d
@@ -65,51 +35,11 @@ define <2 x half> @selectcc_v2f16(half %a, half %b, <2 x half> %c, <2 x half> %d
 define <4 x half> @select_v4f16(i1 zeroext %c, <4 x half> %a, <4 x half> %b) {
 ; CHECK-LABEL: select_v4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    bnez a0, .LBB2_3
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    fsh ft0, 8(sp)
-; CHECK-NEXT:    beqz a0, .LBB2_4
-; CHECK-NEXT:  .LBB2_2:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    j .LBB2_5
-; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    fsh ft0, 8(sp)
-; CHECK-NEXT:    bnez a0, .LBB2_2
-; CHECK-NEXT:  .LBB2_4:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:  .LBB2_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 14(sp)
-; CHECK-NEXT:    bnez a0, .LBB2_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    j .LBB2_8
-; CHECK-NEXT:  .LBB2_7:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:  .LBB2_8:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 12(sp)
-; CHECK-NEXT:    bnez a0, .LBB2_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB2_11
-; CHECK-NEXT:  .LBB2_10:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB2_11:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 10(sp)
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    addi a0, sp, 8
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <4 x half> %a, <4 x half> %b
   ret <4 x half> %v
@@ -118,52 +48,12 @@ define <4 x half> @select_v4f16(i1 zeroext %c, <4 x half> %a, <4 x half> %b) {
 define <4 x half> @selectcc_v4f16(half %a, half %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: selectcc_v4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    feq.h a0, fa0, fa1
-; CHECK-NEXT:    bnez a0, .LBB3_3
-; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    fsh ft0, 8(sp)
-; CHECK-NEXT:    beqz a0, .LBB3_4
-; CHECK-NEXT:  .LBB3_2:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    j .LBB3_5
-; CHECK-NEXT:  .LBB3_3:
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    fsh ft0, 8(sp)
-; CHECK-NEXT:    bnez a0, .LBB3_2
-; CHECK-NEXT:  .LBB3_4:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:  .LBB3_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 14(sp)
-; CHECK-NEXT:    bnez a0, .LBB3_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    j .LBB3_8
-; CHECK-NEXT:  .LBB3_7:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:  .LBB3_8:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 12(sp)
-; CHECK-NEXT:    bnez a0, .LBB3_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB3_11
-; CHECK-NEXT:  .LBB3_10:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB3_11:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 10(sp)
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    addi a0, sp, 8
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <4 x half> %c, <4 x half> %d
@@ -173,87 +63,11 @@ define <4 x half> @selectcc_v4f16(half %a, half %b, <4 x half> %c, <4 x half> %d
 define <8 x half> @select_v8f16(i1 zeroext %c, <8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: select_v8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    bnez a0, .LBB4_3
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    fsh ft0, 16(sp)
-; CHECK-NEXT:    beqz a0, .LBB4_4
-; CHECK-NEXT:  .LBB4_2:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 7
-; CHECK-NEXT:    j .LBB4_5
-; CHECK-NEXT:  .LBB4_3:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    fsh ft0, 16(sp)
-; CHECK-NEXT:    bnez a0, .LBB4_2
-; CHECK-NEXT:  .LBB4_4:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 7
-; CHECK-NEXT:  .LBB4_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 30(sp)
-; CHECK-NEXT:    bnez a0, .LBB4_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 6
-; CHECK-NEXT:    j .LBB4_8
-; CHECK-NEXT:  .LBB4_7:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 6
-; CHECK-NEXT:  .LBB4_8:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 28(sp)
-; CHECK-NEXT:    bnez a0, .LBB4_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 5
-; CHECK-NEXT:    j .LBB4_11
-; CHECK-NEXT:  .LBB4_10:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 5
-; CHECK-NEXT:  .LBB4_11:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 26(sp)
-; CHECK-NEXT:    bnez a0, .LBB4_13
-; CHECK-NEXT:  # %bb.12:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 4
-; CHECK-NEXT:    j .LBB4_14
-; CHECK-NEXT:  .LBB4_13:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 4
-; CHECK-NEXT:  .LBB4_14:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 24(sp)
-; CHECK-NEXT:    bnez a0, .LBB4_16
-; CHECK-NEXT:  # %bb.15:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:    j .LBB4_17
-; CHECK-NEXT:  .LBB4_16:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:  .LBB4_17:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 22(sp)
-; CHECK-NEXT:    bnez a0, .LBB4_19
-; CHECK-NEXT:  # %bb.18:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    j .LBB4_20
-; CHECK-NEXT:  .LBB4_19:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:  .LBB4_20:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 20(sp)
-; CHECK-NEXT:    bnez a0, .LBB4_22
-; CHECK-NEXT:  # %bb.21:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB4_23
-; CHECK-NEXT:  .LBB4_22:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB4_23:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 18(sp)
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 32
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <8 x half> %a, <8 x half> %b
   ret <8 x half> %v
@@ -262,88 +76,12 @@ define <8 x half> @select_v8f16(i1 zeroext %c, <8 x half> %a, <8 x half> %b) {
 define <8 x half> @selectcc_v8f16(half %a, half %b, <8 x half> %c, <8 x half> %d) {
 ; CHECK-LABEL: selectcc_v8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    feq.h a0, fa0, fa1
-; CHECK-NEXT:    bnez a0, .LBB5_3
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    fsh ft0, 16(sp)
-; CHECK-NEXT:    beqz a0, .LBB5_4
-; CHECK-NEXT:  .LBB5_2:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 7
-; CHECK-NEXT:    j .LBB5_5
-; CHECK-NEXT:  .LBB5_3:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    fsh ft0, 16(sp)
-; CHECK-NEXT:    bnez a0, .LBB5_2
-; CHECK-NEXT:  .LBB5_4:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 7
-; CHECK-NEXT:  .LBB5_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 30(sp)
-; CHECK-NEXT:    bnez a0, .LBB5_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 6
-; CHECK-NEXT:    j .LBB5_8
-; CHECK-NEXT:  .LBB5_7:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 6
-; CHECK-NEXT:  .LBB5_8:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 28(sp)
-; CHECK-NEXT:    bnez a0, .LBB5_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 5
-; CHECK-NEXT:    j .LBB5_11
-; CHECK-NEXT:  .LBB5_10:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 5
-; CHECK-NEXT:  .LBB5_11:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 26(sp)
-; CHECK-NEXT:    bnez a0, .LBB5_13
-; CHECK-NEXT:  # %bb.12:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 4
-; CHECK-NEXT:    j .LBB5_14
-; CHECK-NEXT:  .LBB5_13:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 4
-; CHECK-NEXT:  .LBB5_14:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 24(sp)
-; CHECK-NEXT:    bnez a0, .LBB5_16
-; CHECK-NEXT:  # %bb.15:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:    j .LBB5_17
-; CHECK-NEXT:  .LBB5_16:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:  .LBB5_17:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 22(sp)
-; CHECK-NEXT:    bnez a0, .LBB5_19
-; CHECK-NEXT:  # %bb.18:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    j .LBB5_20
-; CHECK-NEXT:  .LBB5_19:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:  .LBB5_20:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 20(sp)
-; CHECK-NEXT:    bnez a0, .LBB5_22
-; CHECK-NEXT:  # %bb.21:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB5_23
-; CHECK-NEXT:  .LBB5_22:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB5_23:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsh ft0, 18(sp)
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 32
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <8 x half> %c, <8 x half> %d
@@ -351,679 +89,28 @@ define <8 x half> @selectcc_v8f16(half %a, half %b, <8 x half> %c, <8 x half> %d
 }
 
 define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b) {
-; RV32-LABEL: select_v16f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -96
-; RV32-NEXT:    .cfi_def_cfa_offset 96
-; RV32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 96
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    bnez a0, .LBB6_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v10
-; RV32-NEXT:    fsh ft0, 32(sp)
-; RV32-NEXT:    beqz a0, .LBB6_4
-; RV32-NEXT:  .LBB6_2:
-; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v8, 15
-; RV32-NEXT:    j .LBB6_5
-; RV32-NEXT:  .LBB6_3:
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsh ft0, 32(sp)
-; RV32-NEXT:    bnez a0, .LBB6_2
-; RV32-NEXT:  .LBB6_4:
-; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 15
-; RV32-NEXT:  .LBB6_5:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 62(sp)
-; RV32-NEXT:    bnez a0, .LBB6_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v26, v10, 14
-; RV32-NEXT:    j .LBB6_8
-; RV32-NEXT:  .LBB6_7:
-; RV32-NEXT:    vslidedown.vi v26, v8, 14
-; RV32-NEXT:  .LBB6_8:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 60(sp)
-; RV32-NEXT:    bnez a0, .LBB6_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v26, v10, 13
-; RV32-NEXT:    j .LBB6_11
-; RV32-NEXT:  .LBB6_10:
-; RV32-NEXT:    vslidedown.vi v26, v8, 13
-; RV32-NEXT:  .LBB6_11:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 58(sp)
-; RV32-NEXT:    bnez a0, .LBB6_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v26, v10, 12
-; RV32-NEXT:    j .LBB6_14
-; RV32-NEXT:  .LBB6_13:
-; RV32-NEXT:    vslidedown.vi v26, v8, 12
-; RV32-NEXT:  .LBB6_14:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 56(sp)
-; RV32-NEXT:    bnez a0, .LBB6_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v26, v10, 11
-; RV32-NEXT:    j .LBB6_17
-; RV32-NEXT:  .LBB6_16:
-; RV32-NEXT:    vslidedown.vi v26, v8, 11
-; RV32-NEXT:  .LBB6_17:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 54(sp)
-; RV32-NEXT:    bnez a0, .LBB6_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v26, v10, 10
-; RV32-NEXT:    j .LBB6_20
-; RV32-NEXT:  .LBB6_19:
-; RV32-NEXT:    vslidedown.vi v26, v8, 10
-; RV32-NEXT:  .LBB6_20:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 52(sp)
-; RV32-NEXT:    bnez a0, .LBB6_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v26, v10, 9
-; RV32-NEXT:    j .LBB6_23
-; RV32-NEXT:  .LBB6_22:
-; RV32-NEXT:    vslidedown.vi v26, v8, 9
-; RV32-NEXT:  .LBB6_23:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 50(sp)
-; RV32-NEXT:    bnez a0, .LBB6_25
-; RV32-NEXT:  # %bb.24:
-; RV32-NEXT:    vslidedown.vi v26, v10, 8
-; RV32-NEXT:    j .LBB6_26
-; RV32-NEXT:  .LBB6_25:
-; RV32-NEXT:    vslidedown.vi v26, v8, 8
-; RV32-NEXT:  .LBB6_26:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 48(sp)
-; RV32-NEXT:    bnez a0, .LBB6_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    vslidedown.vi v26, v10, 7
-; RV32-NEXT:    j .LBB6_29
-; RV32-NEXT:  .LBB6_28:
-; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:  .LBB6_29:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 46(sp)
-; RV32-NEXT:    bnez a0, .LBB6_31
-; RV32-NEXT:  # %bb.30:
-; RV32-NEXT:    vslidedown.vi v26, v10, 6
-; RV32-NEXT:    j .LBB6_32
-; RV32-NEXT:  .LBB6_31:
-; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:  .LBB6_32:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 44(sp)
-; RV32-NEXT:    bnez a0, .LBB6_34
-; RV32-NEXT:  # %bb.33:
-; RV32-NEXT:    vslidedown.vi v26, v10, 5
-; RV32-NEXT:    j .LBB6_35
-; RV32-NEXT:  .LBB6_34:
-; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:  .LBB6_35:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 42(sp)
-; RV32-NEXT:    bnez a0, .LBB6_37
-; RV32-NEXT:  # %bb.36:
-; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    j .LBB6_38
-; RV32-NEXT:  .LBB6_37:
-; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:  .LBB6_38:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 40(sp)
-; RV32-NEXT:    bnez a0, .LBB6_40
-; RV32-NEXT:  # %bb.39:
-; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:    j .LBB6_41
-; RV32-NEXT:  .LBB6_40:
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:  .LBB6_41:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 38(sp)
-; RV32-NEXT:    bnez a0, .LBB6_43
-; RV32-NEXT:  # %bb.42:
-; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    j .LBB6_44
-; RV32-NEXT:  .LBB6_43:
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:  .LBB6_44:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 36(sp)
-; RV32-NEXT:    bnez a0, .LBB6_46
-; RV32-NEXT:  # %bb.45:
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
-; RV32-NEXT:    j .LBB6_47
-; RV32-NEXT:  .LBB6_46:
-; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:  .LBB6_47:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 34(sp)
-; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -96
-; RV32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 96
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v16f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    .cfi_def_cfa_offset 96
-; RV64-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 96
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    bnez a0, .LBB6_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v10
-; RV64-NEXT:    fsh ft0, 32(sp)
-; RV64-NEXT:    beqz a0, .LBB6_4
-; RV64-NEXT:  .LBB6_2:
-; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v8, 15
-; RV64-NEXT:    j .LBB6_5
-; RV64-NEXT:  .LBB6_3:
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsh ft0, 32(sp)
-; RV64-NEXT:    bnez a0, .LBB6_2
-; RV64-NEXT:  .LBB6_4:
-; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 15
-; RV64-NEXT:  .LBB6_5:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 62(sp)
-; RV64-NEXT:    bnez a0, .LBB6_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v26, v10, 14
-; RV64-NEXT:    j .LBB6_8
-; RV64-NEXT:  .LBB6_7:
-; RV64-NEXT:    vslidedown.vi v26, v8, 14
-; RV64-NEXT:  .LBB6_8:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 60(sp)
-; RV64-NEXT:    bnez a0, .LBB6_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v26, v10, 13
-; RV64-NEXT:    j .LBB6_11
-; RV64-NEXT:  .LBB6_10:
-; RV64-NEXT:    vslidedown.vi v26, v8, 13
-; RV64-NEXT:  .LBB6_11:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 58(sp)
-; RV64-NEXT:    bnez a0, .LBB6_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v26, v10, 12
-; RV64-NEXT:    j .LBB6_14
-; RV64-NEXT:  .LBB6_13:
-; RV64-NEXT:    vslidedown.vi v26, v8, 12
-; RV64-NEXT:  .LBB6_14:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 56(sp)
-; RV64-NEXT:    bnez a0, .LBB6_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v26, v10, 11
-; RV64-NEXT:    j .LBB6_17
-; RV64-NEXT:  .LBB6_16:
-; RV64-NEXT:    vslidedown.vi v26, v8, 11
-; RV64-NEXT:  .LBB6_17:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 54(sp)
-; RV64-NEXT:    bnez a0, .LBB6_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v26, v10, 10
-; RV64-NEXT:    j .LBB6_20
-; RV64-NEXT:  .LBB6_19:
-; RV64-NEXT:    vslidedown.vi v26, v8, 10
-; RV64-NEXT:  .LBB6_20:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 52(sp)
-; RV64-NEXT:    bnez a0, .LBB6_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v26, v10, 9
-; RV64-NEXT:    j .LBB6_23
-; RV64-NEXT:  .LBB6_22:
-; RV64-NEXT:    vslidedown.vi v26, v8, 9
-; RV64-NEXT:  .LBB6_23:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 50(sp)
-; RV64-NEXT:    bnez a0, .LBB6_25
-; RV64-NEXT:  # %bb.24:
-; RV64-NEXT:    vslidedown.vi v26, v10, 8
-; RV64-NEXT:    j .LBB6_26
-; RV64-NEXT:  .LBB6_25:
-; RV64-NEXT:    vslidedown.vi v26, v8, 8
-; RV64-NEXT:  .LBB6_26:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 48(sp)
-; RV64-NEXT:    bnez a0, .LBB6_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    vslidedown.vi v26, v10, 7
-; RV64-NEXT:    j .LBB6_29
-; RV64-NEXT:  .LBB6_28:
-; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:  .LBB6_29:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 46(sp)
-; RV64-NEXT:    bnez a0, .LBB6_31
-; RV64-NEXT:  # %bb.30:
-; RV64-NEXT:    vslidedown.vi v26, v10, 6
-; RV64-NEXT:    j .LBB6_32
-; RV64-NEXT:  .LBB6_31:
-; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:  .LBB6_32:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 44(sp)
-; RV64-NEXT:    bnez a0, .LBB6_34
-; RV64-NEXT:  # %bb.33:
-; RV64-NEXT:    vslidedown.vi v26, v10, 5
-; RV64-NEXT:    j .LBB6_35
-; RV64-NEXT:  .LBB6_34:
-; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:  .LBB6_35:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 42(sp)
-; RV64-NEXT:    bnez a0, .LBB6_37
-; RV64-NEXT:  # %bb.36:
-; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    j .LBB6_38
-; RV64-NEXT:  .LBB6_37:
-; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:  .LBB6_38:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 40(sp)
-; RV64-NEXT:    bnez a0, .LBB6_40
-; RV64-NEXT:  # %bb.39:
-; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    j .LBB6_41
-; RV64-NEXT:  .LBB6_40:
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:  .LBB6_41:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 38(sp)
-; RV64-NEXT:    bnez a0, .LBB6_43
-; RV64-NEXT:  # %bb.42:
-; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    j .LBB6_44
-; RV64-NEXT:  .LBB6_43:
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:  .LBB6_44:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 36(sp)
-; RV64-NEXT:    bnez a0, .LBB6_46
-; RV64-NEXT:  # %bb.45:
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
-; RV64-NEXT:    j .LBB6_47
-; RV64-NEXT:  .LBB6_46:
-; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:  .LBB6_47:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 34(sp)
-; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -96
-; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <16 x half> %a, <16 x half> %b
   ret <16 x half> %v
 }
 
 define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half> %d) {
-; RV32-LABEL: selectcc_v16f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -96
-; RV32-NEXT:    .cfi_def_cfa_offset 96
-; RV32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 96
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    feq.h a0, fa0, fa1
-; RV32-NEXT:    bnez a0, .LBB7_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v10
-; RV32-NEXT:    fsh ft0, 32(sp)
-; RV32-NEXT:    beqz a0, .LBB7_4
-; RV32-NEXT:  .LBB7_2:
-; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v8, 15
-; RV32-NEXT:    j .LBB7_5
-; RV32-NEXT:  .LBB7_3:
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsh ft0, 32(sp)
-; RV32-NEXT:    bnez a0, .LBB7_2
-; RV32-NEXT:  .LBB7_4:
-; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 15
-; RV32-NEXT:  .LBB7_5:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 62(sp)
-; RV32-NEXT:    bnez a0, .LBB7_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v26, v10, 14
-; RV32-NEXT:    j .LBB7_8
-; RV32-NEXT:  .LBB7_7:
-; RV32-NEXT:    vslidedown.vi v26, v8, 14
-; RV32-NEXT:  .LBB7_8:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 60(sp)
-; RV32-NEXT:    bnez a0, .LBB7_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v26, v10, 13
-; RV32-NEXT:    j .LBB7_11
-; RV32-NEXT:  .LBB7_10:
-; RV32-NEXT:    vslidedown.vi v26, v8, 13
-; RV32-NEXT:  .LBB7_11:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 58(sp)
-; RV32-NEXT:    bnez a0, .LBB7_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v26, v10, 12
-; RV32-NEXT:    j .LBB7_14
-; RV32-NEXT:  .LBB7_13:
-; RV32-NEXT:    vslidedown.vi v26, v8, 12
-; RV32-NEXT:  .LBB7_14:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 56(sp)
-; RV32-NEXT:    bnez a0, .LBB7_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v26, v10, 11
-; RV32-NEXT:    j .LBB7_17
-; RV32-NEXT:  .LBB7_16:
-; RV32-NEXT:    vslidedown.vi v26, v8, 11
-; RV32-NEXT:  .LBB7_17:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 54(sp)
-; RV32-NEXT:    bnez a0, .LBB7_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v26, v10, 10
-; RV32-NEXT:    j .LBB7_20
-; RV32-NEXT:  .LBB7_19:
-; RV32-NEXT:    vslidedown.vi v26, v8, 10
-; RV32-NEXT:  .LBB7_20:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 52(sp)
-; RV32-NEXT:    bnez a0, .LBB7_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v26, v10, 9
-; RV32-NEXT:    j .LBB7_23
-; RV32-NEXT:  .LBB7_22:
-; RV32-NEXT:    vslidedown.vi v26, v8, 9
-; RV32-NEXT:  .LBB7_23:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 50(sp)
-; RV32-NEXT:    bnez a0, .LBB7_25
-; RV32-NEXT:  # %bb.24:
-; RV32-NEXT:    vslidedown.vi v26, v10, 8
-; RV32-NEXT:    j .LBB7_26
-; RV32-NEXT:  .LBB7_25:
-; RV32-NEXT:    vslidedown.vi v26, v8, 8
-; RV32-NEXT:  .LBB7_26:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 48(sp)
-; RV32-NEXT:    bnez a0, .LBB7_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    vslidedown.vi v26, v10, 7
-; RV32-NEXT:    j .LBB7_29
-; RV32-NEXT:  .LBB7_28:
-; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:  .LBB7_29:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 46(sp)
-; RV32-NEXT:    bnez a0, .LBB7_31
-; RV32-NEXT:  # %bb.30:
-; RV32-NEXT:    vslidedown.vi v26, v10, 6
-; RV32-NEXT:    j .LBB7_32
-; RV32-NEXT:  .LBB7_31:
-; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:  .LBB7_32:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 44(sp)
-; RV32-NEXT:    bnez a0, .LBB7_34
-; RV32-NEXT:  # %bb.33:
-; RV32-NEXT:    vslidedown.vi v26, v10, 5
-; RV32-NEXT:    j .LBB7_35
-; RV32-NEXT:  .LBB7_34:
-; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:  .LBB7_35:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 42(sp)
-; RV32-NEXT:    bnez a0, .LBB7_37
-; RV32-NEXT:  # %bb.36:
-; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    j .LBB7_38
-; RV32-NEXT:  .LBB7_37:
-; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:  .LBB7_38:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 40(sp)
-; RV32-NEXT:    bnez a0, .LBB7_40
-; RV32-NEXT:  # %bb.39:
-; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:    j .LBB7_41
-; RV32-NEXT:  .LBB7_40:
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:  .LBB7_41:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 38(sp)
-; RV32-NEXT:    bnez a0, .LBB7_43
-; RV32-NEXT:  # %bb.42:
-; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    j .LBB7_44
-; RV32-NEXT:  .LBB7_43:
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:  .LBB7_44:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 36(sp)
-; RV32-NEXT:    bnez a0, .LBB7_46
-; RV32-NEXT:  # %bb.45:
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
-; RV32-NEXT:    j .LBB7_47
-; RV32-NEXT:  .LBB7_46:
-; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:  .LBB7_47:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsh ft0, 34(sp)
-; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -96
-; RV32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 96
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_v16f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    .cfi_def_cfa_offset 96
-; RV64-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 96
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    feq.h a0, fa0, fa1
-; RV64-NEXT:    bnez a0, .LBB7_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v10
-; RV64-NEXT:    fsh ft0, 32(sp)
-; RV64-NEXT:    beqz a0, .LBB7_4
-; RV64-NEXT:  .LBB7_2:
-; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v8, 15
-; RV64-NEXT:    j .LBB7_5
-; RV64-NEXT:  .LBB7_3:
-; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsh ft0, 32(sp)
-; RV64-NEXT:    bnez a0, .LBB7_2
-; RV64-NEXT:  .LBB7_4:
-; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 15
-; RV64-NEXT:  .LBB7_5:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 62(sp)
-; RV64-NEXT:    bnez a0, .LBB7_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v26, v10, 14
-; RV64-NEXT:    j .LBB7_8
-; RV64-NEXT:  .LBB7_7:
-; RV64-NEXT:    vslidedown.vi v26, v8, 14
-; RV64-NEXT:  .LBB7_8:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 60(sp)
-; RV64-NEXT:    bnez a0, .LBB7_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v26, v10, 13
-; RV64-NEXT:    j .LBB7_11
-; RV64-NEXT:  .LBB7_10:
-; RV64-NEXT:    vslidedown.vi v26, v8, 13
-; RV64-NEXT:  .LBB7_11:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 58(sp)
-; RV64-NEXT:    bnez a0, .LBB7_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v26, v10, 12
-; RV64-NEXT:    j .LBB7_14
-; RV64-NEXT:  .LBB7_13:
-; RV64-NEXT:    vslidedown.vi v26, v8, 12
-; RV64-NEXT:  .LBB7_14:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 56(sp)
-; RV64-NEXT:    bnez a0, .LBB7_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v26, v10, 11
-; RV64-NEXT:    j .LBB7_17
-; RV64-NEXT:  .LBB7_16:
-; RV64-NEXT:    vslidedown.vi v26, v8, 11
-; RV64-NEXT:  .LBB7_17:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 54(sp)
-; RV64-NEXT:    bnez a0, .LBB7_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v26, v10, 10
-; RV64-NEXT:    j .LBB7_20
-; RV64-NEXT:  .LBB7_19:
-; RV64-NEXT:    vslidedown.vi v26, v8, 10
-; RV64-NEXT:  .LBB7_20:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 52(sp)
-; RV64-NEXT:    bnez a0, .LBB7_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v26, v10, 9
-; RV64-NEXT:    j .LBB7_23
-; RV64-NEXT:  .LBB7_22:
-; RV64-NEXT:    vslidedown.vi v26, v8, 9
-; RV64-NEXT:  .LBB7_23:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 50(sp)
-; RV64-NEXT:    bnez a0, .LBB7_25
-; RV64-NEXT:  # %bb.24:
-; RV64-NEXT:    vslidedown.vi v26, v10, 8
-; RV64-NEXT:    j .LBB7_26
-; RV64-NEXT:  .LBB7_25:
-; RV64-NEXT:    vslidedown.vi v26, v8, 8
-; RV64-NEXT:  .LBB7_26:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 48(sp)
-; RV64-NEXT:    bnez a0, .LBB7_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    vslidedown.vi v26, v10, 7
-; RV64-NEXT:    j .LBB7_29
-; RV64-NEXT:  .LBB7_28:
-; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:  .LBB7_29:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 46(sp)
-; RV64-NEXT:    bnez a0, .LBB7_31
-; RV64-NEXT:  # %bb.30:
-; RV64-NEXT:    vslidedown.vi v26, v10, 6
-; RV64-NEXT:    j .LBB7_32
-; RV64-NEXT:  .LBB7_31:
-; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:  .LBB7_32:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 44(sp)
-; RV64-NEXT:    bnez a0, .LBB7_34
-; RV64-NEXT:  # %bb.33:
-; RV64-NEXT:    vslidedown.vi v26, v10, 5
-; RV64-NEXT:    j .LBB7_35
-; RV64-NEXT:  .LBB7_34:
-; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:  .LBB7_35:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 42(sp)
-; RV64-NEXT:    bnez a0, .LBB7_37
-; RV64-NEXT:  # %bb.36:
-; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    j .LBB7_38
-; RV64-NEXT:  .LBB7_37:
-; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:  .LBB7_38:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 40(sp)
-; RV64-NEXT:    bnez a0, .LBB7_40
-; RV64-NEXT:  # %bb.39:
-; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    j .LBB7_41
-; RV64-NEXT:  .LBB7_40:
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:  .LBB7_41:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 38(sp)
-; RV64-NEXT:    bnez a0, .LBB7_43
-; RV64-NEXT:  # %bb.42:
-; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    j .LBB7_44
-; RV64-NEXT:  .LBB7_43:
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:  .LBB7_44:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 36(sp)
-; RV64-NEXT:    bnez a0, .LBB7_46
-; RV64-NEXT:  # %bb.45:
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
-; RV64-NEXT:    j .LBB7_47
-; RV64-NEXT:  .LBB7_46:
-; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:  .LBB7_47:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsh ft0, 34(sp)
-; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -96
-; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <16 x half> %c, <16 x half> %d
   ret <16 x half> %v
@@ -1032,24 +119,11 @@ define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half
 define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: select_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bnez a0, .LBB8_2
-; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB8_3
-; CHECK-NEXT:  .LBB8_2:
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB8_3:
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, ft1
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x float> %a, <2 x float> %b
   ret <2 x float> %v
@@ -1059,28 +133,11 @@ define <2 x float> @selectcc_v2f32(float %a, float %b, <2 x float> %c, <2 x floa
 ; CHECK-LABEL: selectcc_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    feq.s a0, fa0, fa1
-; CHECK-NEXT:    bnez a0, .LBB9_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB9_3
-; CHECK-NEXT:  .LBB9_2:
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB9_3:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v25, ft0
-; CHECK-NEXT:    bnez a0, .LBB9_5
-; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    j .LBB9_6
-; CHECK-NEXT:  .LBB9_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:  .LBB9_6:
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmv.s.f v25, ft0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <2 x float> %c, <2 x float> %d
@@ -1090,51 +147,11 @@ define <2 x float> @selectcc_v2f32(float %a, float %b, <2 x float> %c, <2 x floa
 define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: select_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    bnez a0, .LBB10_3
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    fsw ft0, 16(sp)
-; CHECK-NEXT:    beqz a0, .LBB10_4
-; CHECK-NEXT:  .LBB10_2:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    j .LBB10_5
-; CHECK-NEXT:  .LBB10_3:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    fsw ft0, 16(sp)
-; CHECK-NEXT:    bnez a0, .LBB10_2
-; CHECK-NEXT:  .LBB10_4:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:  .LBB10_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsw ft0, 28(sp)
-; CHECK-NEXT:    bnez a0, .LBB10_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    j .LBB10_8
-; CHECK-NEXT:  .LBB10_7:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:  .LBB10_8:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsw ft0, 24(sp)
-; CHECK-NEXT:    bnez a0, .LBB10_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB10_11
-; CHECK-NEXT:  .LBB10_10:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB10_11:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsw ft0, 20(sp)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 32
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <4 x float> %a, <4 x float> %b
   ret <4 x float> %v
@@ -1143,52 +160,12 @@ define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b)
 define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x float> %d) {
 ; CHECK-LABEL: selectcc_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    feq.s a0, fa0, fa1
-; CHECK-NEXT:    bnez a0, .LBB11_3
-; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    fsw ft0, 16(sp)
-; CHECK-NEXT:    beqz a0, .LBB11_4
-; CHECK-NEXT:  .LBB11_2:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    j .LBB11_5
-; CHECK-NEXT:  .LBB11_3:
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    fsw ft0, 16(sp)
-; CHECK-NEXT:    bnez a0, .LBB11_2
-; CHECK-NEXT:  .LBB11_4:
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:  .LBB11_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsw ft0, 28(sp)
-; CHECK-NEXT:    bnez a0, .LBB11_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    j .LBB11_8
-; CHECK-NEXT:  .LBB11_7:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:  .LBB11_8:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsw ft0, 24(sp)
-; CHECK-NEXT:    bnez a0, .LBB11_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB11_11
-; CHECK-NEXT:  .LBB11_10:
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB11_11:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    fsw ft0, 20(sp)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi sp, sp, 32
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <4 x float> %c, <4 x float> %d
@@ -1196,1070 +173,56 @@ define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x floa
 }
 
 define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b) {
-; RV32-LABEL: select_v8f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -96
-; RV32-NEXT:    .cfi_def_cfa_offset 96
-; RV32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 96
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    bnez a0, .LBB12_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v10
-; RV32-NEXT:    fsw ft0, 32(sp)
-; RV32-NEXT:    beqz a0, .LBB12_4
-; RV32-NEXT:  .LBB12_2:
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:    j .LBB12_5
-; RV32-NEXT:  .LBB12_3:
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsw ft0, 32(sp)
-; RV32-NEXT:    bnez a0, .LBB12_2
-; RV32-NEXT:  .LBB12_4:
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 7
-; RV32-NEXT:  .LBB12_5:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 60(sp)
-; RV32-NEXT:    bnez a0, .LBB12_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v26, v10, 6
-; RV32-NEXT:    j .LBB12_8
-; RV32-NEXT:  .LBB12_7:
-; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:  .LBB12_8:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 56(sp)
-; RV32-NEXT:    bnez a0, .LBB12_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v26, v10, 5
-; RV32-NEXT:    j .LBB12_11
-; RV32-NEXT:  .LBB12_10:
-; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:  .LBB12_11:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 52(sp)
-; RV32-NEXT:    bnez a0, .LBB12_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    j .LBB12_14
-; RV32-NEXT:  .LBB12_13:
-; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:  .LBB12_14:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 48(sp)
-; RV32-NEXT:    bnez a0, .LBB12_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:    j .LBB12_17
-; RV32-NEXT:  .LBB12_16:
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:  .LBB12_17:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 44(sp)
-; RV32-NEXT:    bnez a0, .LBB12_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    j .LBB12_20
-; RV32-NEXT:  .LBB12_19:
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:  .LBB12_20:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 40(sp)
-; RV32-NEXT:    bnez a0, .LBB12_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
-; RV32-NEXT:    j .LBB12_23
-; RV32-NEXT:  .LBB12_22:
-; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:  .LBB12_23:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 36(sp)
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -96
-; RV32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 96
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v8f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    .cfi_def_cfa_offset 96
-; RV64-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 96
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    bnez a0, .LBB12_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v10
-; RV64-NEXT:    fsw ft0, 32(sp)
-; RV64-NEXT:    beqz a0, .LBB12_4
-; RV64-NEXT:  .LBB12_2:
-; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:    j .LBB12_5
-; RV64-NEXT:  .LBB12_3:
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsw ft0, 32(sp)
-; RV64-NEXT:    bnez a0, .LBB12_2
-; RV64-NEXT:  .LBB12_4:
-; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 7
-; RV64-NEXT:  .LBB12_5:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 60(sp)
-; RV64-NEXT:    bnez a0, .LBB12_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v26, v10, 6
-; RV64-NEXT:    j .LBB12_8
-; RV64-NEXT:  .LBB12_7:
-; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:  .LBB12_8:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 56(sp)
-; RV64-NEXT:    bnez a0, .LBB12_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v26, v10, 5
-; RV64-NEXT:    j .LBB12_11
-; RV64-NEXT:  .LBB12_10:
-; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:  .LBB12_11:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 52(sp)
-; RV64-NEXT:    bnez a0, .LBB12_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    j .LBB12_14
-; RV64-NEXT:  .LBB12_13:
-; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:  .LBB12_14:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 48(sp)
-; RV64-NEXT:    bnez a0, .LBB12_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    j .LBB12_17
-; RV64-NEXT:  .LBB12_16:
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:  .LBB12_17:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 44(sp)
-; RV64-NEXT:    bnez a0, .LBB12_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    j .LBB12_20
-; RV64-NEXT:  .LBB12_19:
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:  .LBB12_20:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 40(sp)
-; RV64-NEXT:    bnez a0, .LBB12_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
-; RV64-NEXT:    j .LBB12_23
-; RV64-NEXT:  .LBB12_22:
-; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:  .LBB12_23:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 36(sp)
-; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -96
-; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <8 x float> %a, <8 x float> %b
   ret <8 x float> %v
 }
 
 define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x float> %d) {
-; RV32-LABEL: selectcc_v8f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -96
-; RV32-NEXT:    .cfi_def_cfa_offset 96
-; RV32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 96
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    feq.s a0, fa0, fa1
-; RV32-NEXT:    bnez a0, .LBB13_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v10
-; RV32-NEXT:    fsw ft0, 32(sp)
-; RV32-NEXT:    beqz a0, .LBB13_4
-; RV32-NEXT:  .LBB13_2:
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:    j .LBB13_5
-; RV32-NEXT:  .LBB13_3:
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsw ft0, 32(sp)
-; RV32-NEXT:    bnez a0, .LBB13_2
-; RV32-NEXT:  .LBB13_4:
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 7
-; RV32-NEXT:  .LBB13_5:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 60(sp)
-; RV32-NEXT:    bnez a0, .LBB13_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v26, v10, 6
-; RV32-NEXT:    j .LBB13_8
-; RV32-NEXT:  .LBB13_7:
-; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:  .LBB13_8:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 56(sp)
-; RV32-NEXT:    bnez a0, .LBB13_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v26, v10, 5
-; RV32-NEXT:    j .LBB13_11
-; RV32-NEXT:  .LBB13_10:
-; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:  .LBB13_11:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 52(sp)
-; RV32-NEXT:    bnez a0, .LBB13_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    j .LBB13_14
-; RV32-NEXT:  .LBB13_13:
-; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:  .LBB13_14:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 48(sp)
-; RV32-NEXT:    bnez a0, .LBB13_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:    j .LBB13_17
-; RV32-NEXT:  .LBB13_16:
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:  .LBB13_17:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 44(sp)
-; RV32-NEXT:    bnez a0, .LBB13_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    j .LBB13_20
-; RV32-NEXT:  .LBB13_19:
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:  .LBB13_20:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 40(sp)
-; RV32-NEXT:    bnez a0, .LBB13_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
-; RV32-NEXT:    j .LBB13_23
-; RV32-NEXT:  .LBB13_22:
-; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:  .LBB13_23:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsw ft0, 36(sp)
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -96
-; RV32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 96
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_v8f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    .cfi_def_cfa_offset 96
-; RV64-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 96
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    feq.s a0, fa0, fa1
-; RV64-NEXT:    bnez a0, .LBB13_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v10
-; RV64-NEXT:    fsw ft0, 32(sp)
-; RV64-NEXT:    beqz a0, .LBB13_4
-; RV64-NEXT:  .LBB13_2:
-; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:    j .LBB13_5
-; RV64-NEXT:  .LBB13_3:
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsw ft0, 32(sp)
-; RV64-NEXT:    bnez a0, .LBB13_2
-; RV64-NEXT:  .LBB13_4:
-; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 7
-; RV64-NEXT:  .LBB13_5:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 60(sp)
-; RV64-NEXT:    bnez a0, .LBB13_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v26, v10, 6
-; RV64-NEXT:    j .LBB13_8
-; RV64-NEXT:  .LBB13_7:
-; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:  .LBB13_8:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 56(sp)
-; RV64-NEXT:    bnez a0, .LBB13_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v26, v10, 5
-; RV64-NEXT:    j .LBB13_11
-; RV64-NEXT:  .LBB13_10:
-; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:  .LBB13_11:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 52(sp)
-; RV64-NEXT:    bnez a0, .LBB13_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    j .LBB13_14
-; RV64-NEXT:  .LBB13_13:
-; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:  .LBB13_14:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 48(sp)
-; RV64-NEXT:    bnez a0, .LBB13_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    j .LBB13_17
-; RV64-NEXT:  .LBB13_16:
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:  .LBB13_17:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 44(sp)
-; RV64-NEXT:    bnez a0, .LBB13_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    j .LBB13_20
-; RV64-NEXT:  .LBB13_19:
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:  .LBB13_20:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 40(sp)
-; RV64-NEXT:    bnez a0, .LBB13_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
-; RV64-NEXT:    j .LBB13_23
-; RV64-NEXT:  .LBB13_22:
-; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:  .LBB13_23:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsw ft0, 36(sp)
-; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -96
-; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <8 x float> %c, <8 x float> %d
   ret <8 x float> %v
 }
 
 define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float> %b) {
-; RV32-LABEL: select_v16f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -192
-; RV32-NEXT:    .cfi_def_cfa_offset 192
-; RV32-NEXT:    sw ra, 188(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 184(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 192
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -64
-; RV32-NEXT:    bnez a0, .LBB14_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v12
-; RV32-NEXT:    fsw ft0, 64(sp)
-; RV32-NEXT:    beqz a0, .LBB14_4
-; RV32-NEXT:  .LBB14_2:
-; RV32-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v8, 15
-; RV32-NEXT:    j .LBB14_5
-; RV32-NEXT:  .LBB14_3:
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsw ft0, 64(sp)
-; RV32-NEXT:    bnez a0, .LBB14_2
-; RV32-NEXT:  .LBB14_4:
-; RV32-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 15
-; RV32-NEXT:  .LBB14_5:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 124(sp)
-; RV32-NEXT:    bnez a0, .LBB14_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v28, v12, 14
-; RV32-NEXT:    j .LBB14_8
-; RV32-NEXT:  .LBB14_7:
-; RV32-NEXT:    vslidedown.vi v28, v8, 14
-; RV32-NEXT:  .LBB14_8:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 120(sp)
-; RV32-NEXT:    bnez a0, .LBB14_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v28, v12, 13
-; RV32-NEXT:    j .LBB14_11
-; RV32-NEXT:  .LBB14_10:
-; RV32-NEXT:    vslidedown.vi v28, v8, 13
-; RV32-NEXT:  .LBB14_11:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 116(sp)
-; RV32-NEXT:    bnez a0, .LBB14_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v28, v12, 12
-; RV32-NEXT:    j .LBB14_14
-; RV32-NEXT:  .LBB14_13:
-; RV32-NEXT:    vslidedown.vi v28, v8, 12
-; RV32-NEXT:  .LBB14_14:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 112(sp)
-; RV32-NEXT:    bnez a0, .LBB14_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v28, v12, 11
-; RV32-NEXT:    j .LBB14_17
-; RV32-NEXT:  .LBB14_16:
-; RV32-NEXT:    vslidedown.vi v28, v8, 11
-; RV32-NEXT:  .LBB14_17:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 108(sp)
-; RV32-NEXT:    bnez a0, .LBB14_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v28, v12, 10
-; RV32-NEXT:    j .LBB14_20
-; RV32-NEXT:  .LBB14_19:
-; RV32-NEXT:    vslidedown.vi v28, v8, 10
-; RV32-NEXT:  .LBB14_20:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 104(sp)
-; RV32-NEXT:    bnez a0, .LBB14_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v28, v12, 9
-; RV32-NEXT:    j .LBB14_23
-; RV32-NEXT:  .LBB14_22:
-; RV32-NEXT:    vslidedown.vi v28, v8, 9
-; RV32-NEXT:  .LBB14_23:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 100(sp)
-; RV32-NEXT:    bnez a0, .LBB14_25
-; RV32-NEXT:  # %bb.24:
-; RV32-NEXT:    vslidedown.vi v28, v12, 8
-; RV32-NEXT:    j .LBB14_26
-; RV32-NEXT:  .LBB14_25:
-; RV32-NEXT:    vslidedown.vi v28, v8, 8
-; RV32-NEXT:  .LBB14_26:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 96(sp)
-; RV32-NEXT:    bnez a0, .LBB14_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    vslidedown.vi v28, v12, 7
-; RV32-NEXT:    j .LBB14_29
-; RV32-NEXT:  .LBB14_28:
-; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:  .LBB14_29:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 92(sp)
-; RV32-NEXT:    bnez a0, .LBB14_31
-; RV32-NEXT:  # %bb.30:
-; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    j .LBB14_32
-; RV32-NEXT:  .LBB14_31:
-; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:  .LBB14_32:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 88(sp)
-; RV32-NEXT:    bnez a0, .LBB14_34
-; RV32-NEXT:  # %bb.33:
-; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    j .LBB14_35
-; RV32-NEXT:  .LBB14_34:
-; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:  .LBB14_35:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 84(sp)
-; RV32-NEXT:    bnez a0, .LBB14_37
-; RV32-NEXT:  # %bb.36:
-; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    j .LBB14_38
-; RV32-NEXT:  .LBB14_37:
-; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:  .LBB14_38:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 80(sp)
-; RV32-NEXT:    bnez a0, .LBB14_40
-; RV32-NEXT:  # %bb.39:
-; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    j .LBB14_41
-; RV32-NEXT:  .LBB14_40:
-; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:  .LBB14_41:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 76(sp)
-; RV32-NEXT:    bnez a0, .LBB14_43
-; RV32-NEXT:  # %bb.42:
-; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    j .LBB14_44
-; RV32-NEXT:  .LBB14_43:
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:  .LBB14_44:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 72(sp)
-; RV32-NEXT:    bnez a0, .LBB14_46
-; RV32-NEXT:  # %bb.45:
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
-; RV32-NEXT:    j .LBB14_47
-; RV32-NEXT:  .LBB14_46:
-; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:  .LBB14_47:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 68(sp)
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -192
-; RV32-NEXT:    lw s0, 184(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 192
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v16f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -192
-; RV64-NEXT:    .cfi_def_cfa_offset 192
-; RV64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 192
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -64
-; RV64-NEXT:    bnez a0, .LBB14_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v12
-; RV64-NEXT:    fsw ft0, 64(sp)
-; RV64-NEXT:    beqz a0, .LBB14_4
-; RV64-NEXT:  .LBB14_2:
-; RV64-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v8, 15
-; RV64-NEXT:    j .LBB14_5
-; RV64-NEXT:  .LBB14_3:
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsw ft0, 64(sp)
-; RV64-NEXT:    bnez a0, .LBB14_2
-; RV64-NEXT:  .LBB14_4:
-; RV64-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 15
-; RV64-NEXT:  .LBB14_5:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 124(sp)
-; RV64-NEXT:    bnez a0, .LBB14_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v28, v12, 14
-; RV64-NEXT:    j .LBB14_8
-; RV64-NEXT:  .LBB14_7:
-; RV64-NEXT:    vslidedown.vi v28, v8, 14
-; RV64-NEXT:  .LBB14_8:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 120(sp)
-; RV64-NEXT:    bnez a0, .LBB14_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v28, v12, 13
-; RV64-NEXT:    j .LBB14_11
-; RV64-NEXT:  .LBB14_10:
-; RV64-NEXT:    vslidedown.vi v28, v8, 13
-; RV64-NEXT:  .LBB14_11:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 116(sp)
-; RV64-NEXT:    bnez a0, .LBB14_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v28, v12, 12
-; RV64-NEXT:    j .LBB14_14
-; RV64-NEXT:  .LBB14_13:
-; RV64-NEXT:    vslidedown.vi v28, v8, 12
-; RV64-NEXT:  .LBB14_14:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 112(sp)
-; RV64-NEXT:    bnez a0, .LBB14_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v28, v12, 11
-; RV64-NEXT:    j .LBB14_17
-; RV64-NEXT:  .LBB14_16:
-; RV64-NEXT:    vslidedown.vi v28, v8, 11
-; RV64-NEXT:  .LBB14_17:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 108(sp)
-; RV64-NEXT:    bnez a0, .LBB14_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v28, v12, 10
-; RV64-NEXT:    j .LBB14_20
-; RV64-NEXT:  .LBB14_19:
-; RV64-NEXT:    vslidedown.vi v28, v8, 10
-; RV64-NEXT:  .LBB14_20:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 104(sp)
-; RV64-NEXT:    bnez a0, .LBB14_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v28, v12, 9
-; RV64-NEXT:    j .LBB14_23
-; RV64-NEXT:  .LBB14_22:
-; RV64-NEXT:    vslidedown.vi v28, v8, 9
-; RV64-NEXT:  .LBB14_23:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 100(sp)
-; RV64-NEXT:    bnez a0, .LBB14_25
-; RV64-NEXT:  # %bb.24:
-; RV64-NEXT:    vslidedown.vi v28, v12, 8
-; RV64-NEXT:    j .LBB14_26
-; RV64-NEXT:  .LBB14_25:
-; RV64-NEXT:    vslidedown.vi v28, v8, 8
-; RV64-NEXT:  .LBB14_26:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 96(sp)
-; RV64-NEXT:    bnez a0, .LBB14_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    vslidedown.vi v28, v12, 7
-; RV64-NEXT:    j .LBB14_29
-; RV64-NEXT:  .LBB14_28:
-; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:  .LBB14_29:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 92(sp)
-; RV64-NEXT:    bnez a0, .LBB14_31
-; RV64-NEXT:  # %bb.30:
-; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    j .LBB14_32
-; RV64-NEXT:  .LBB14_31:
-; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:  .LBB14_32:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 88(sp)
-; RV64-NEXT:    bnez a0, .LBB14_34
-; RV64-NEXT:  # %bb.33:
-; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    j .LBB14_35
-; RV64-NEXT:  .LBB14_34:
-; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:  .LBB14_35:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 84(sp)
-; RV64-NEXT:    bnez a0, .LBB14_37
-; RV64-NEXT:  # %bb.36:
-; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    j .LBB14_38
-; RV64-NEXT:  .LBB14_37:
-; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:  .LBB14_38:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 80(sp)
-; RV64-NEXT:    bnez a0, .LBB14_40
-; RV64-NEXT:  # %bb.39:
-; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    j .LBB14_41
-; RV64-NEXT:  .LBB14_40:
-; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:  .LBB14_41:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 76(sp)
-; RV64-NEXT:    bnez a0, .LBB14_43
-; RV64-NEXT:  # %bb.42:
-; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    j .LBB14_44
-; RV64-NEXT:  .LBB14_43:
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:  .LBB14_44:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 72(sp)
-; RV64-NEXT:    bnez a0, .LBB14_46
-; RV64-NEXT:  # %bb.45:
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
-; RV64-NEXT:    j .LBB14_47
-; RV64-NEXT:  .LBB14_46:
-; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:  .LBB14_47:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 68(sp)
-; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV64-NEXT:    addi a0, sp, 64
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -192
-; RV64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 192
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <16 x float> %a, <16 x float> %b
   ret <16 x float> %v
 }
 
 define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x float> %d) {
-; RV32-LABEL: selectcc_v16f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -192
-; RV32-NEXT:    .cfi_def_cfa_offset 192
-; RV32-NEXT:    sw ra, 188(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 184(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 192
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -64
-; RV32-NEXT:    feq.s a0, fa0, fa1
-; RV32-NEXT:    bnez a0, .LBB15_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v12
-; RV32-NEXT:    fsw ft0, 64(sp)
-; RV32-NEXT:    beqz a0, .LBB15_4
-; RV32-NEXT:  .LBB15_2:
-; RV32-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v8, 15
-; RV32-NEXT:    j .LBB15_5
-; RV32-NEXT:  .LBB15_3:
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsw ft0, 64(sp)
-; RV32-NEXT:    bnez a0, .LBB15_2
-; RV32-NEXT:  .LBB15_4:
-; RV32-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 15
-; RV32-NEXT:  .LBB15_5:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 124(sp)
-; RV32-NEXT:    bnez a0, .LBB15_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v28, v12, 14
-; RV32-NEXT:    j .LBB15_8
-; RV32-NEXT:  .LBB15_7:
-; RV32-NEXT:    vslidedown.vi v28, v8, 14
-; RV32-NEXT:  .LBB15_8:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 120(sp)
-; RV32-NEXT:    bnez a0, .LBB15_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v28, v12, 13
-; RV32-NEXT:    j .LBB15_11
-; RV32-NEXT:  .LBB15_10:
-; RV32-NEXT:    vslidedown.vi v28, v8, 13
-; RV32-NEXT:  .LBB15_11:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 116(sp)
-; RV32-NEXT:    bnez a0, .LBB15_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v28, v12, 12
-; RV32-NEXT:    j .LBB15_14
-; RV32-NEXT:  .LBB15_13:
-; RV32-NEXT:    vslidedown.vi v28, v8, 12
-; RV32-NEXT:  .LBB15_14:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 112(sp)
-; RV32-NEXT:    bnez a0, .LBB15_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v28, v12, 11
-; RV32-NEXT:    j .LBB15_17
-; RV32-NEXT:  .LBB15_16:
-; RV32-NEXT:    vslidedown.vi v28, v8, 11
-; RV32-NEXT:  .LBB15_17:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 108(sp)
-; RV32-NEXT:    bnez a0, .LBB15_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v28, v12, 10
-; RV32-NEXT:    j .LBB15_20
-; RV32-NEXT:  .LBB15_19:
-; RV32-NEXT:    vslidedown.vi v28, v8, 10
-; RV32-NEXT:  .LBB15_20:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 104(sp)
-; RV32-NEXT:    bnez a0, .LBB15_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v28, v12, 9
-; RV32-NEXT:    j .LBB15_23
-; RV32-NEXT:  .LBB15_22:
-; RV32-NEXT:    vslidedown.vi v28, v8, 9
-; RV32-NEXT:  .LBB15_23:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 100(sp)
-; RV32-NEXT:    bnez a0, .LBB15_25
-; RV32-NEXT:  # %bb.24:
-; RV32-NEXT:    vslidedown.vi v28, v12, 8
-; RV32-NEXT:    j .LBB15_26
-; RV32-NEXT:  .LBB15_25:
-; RV32-NEXT:    vslidedown.vi v28, v8, 8
-; RV32-NEXT:  .LBB15_26:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 96(sp)
-; RV32-NEXT:    bnez a0, .LBB15_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    vslidedown.vi v28, v12, 7
-; RV32-NEXT:    j .LBB15_29
-; RV32-NEXT:  .LBB15_28:
-; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:  .LBB15_29:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 92(sp)
-; RV32-NEXT:    bnez a0, .LBB15_31
-; RV32-NEXT:  # %bb.30:
-; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    j .LBB15_32
-; RV32-NEXT:  .LBB15_31:
-; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:  .LBB15_32:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 88(sp)
-; RV32-NEXT:    bnez a0, .LBB15_34
-; RV32-NEXT:  # %bb.33:
-; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    j .LBB15_35
-; RV32-NEXT:  .LBB15_34:
-; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:  .LBB15_35:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 84(sp)
-; RV32-NEXT:    bnez a0, .LBB15_37
-; RV32-NEXT:  # %bb.36:
-; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    j .LBB15_38
-; RV32-NEXT:  .LBB15_37:
-; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:  .LBB15_38:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 80(sp)
-; RV32-NEXT:    bnez a0, .LBB15_40
-; RV32-NEXT:  # %bb.39:
-; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    j .LBB15_41
-; RV32-NEXT:  .LBB15_40:
-; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:  .LBB15_41:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 76(sp)
-; RV32-NEXT:    bnez a0, .LBB15_43
-; RV32-NEXT:  # %bb.42:
-; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    j .LBB15_44
-; RV32-NEXT:  .LBB15_43:
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:  .LBB15_44:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 72(sp)
-; RV32-NEXT:    bnez a0, .LBB15_46
-; RV32-NEXT:  # %bb.45:
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
-; RV32-NEXT:    j .LBB15_47
-; RV32-NEXT:  .LBB15_46:
-; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:  .LBB15_47:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsw ft0, 68(sp)
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -192
-; RV32-NEXT:    lw s0, 184(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 192
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_v16f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -192
-; RV64-NEXT:    .cfi_def_cfa_offset 192
-; RV64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 192
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -64
-; RV64-NEXT:    feq.s a0, fa0, fa1
-; RV64-NEXT:    bnez a0, .LBB15_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v12
-; RV64-NEXT:    fsw ft0, 64(sp)
-; RV64-NEXT:    beqz a0, .LBB15_4
-; RV64-NEXT:  .LBB15_2:
-; RV64-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v8, 15
-; RV64-NEXT:    j .LBB15_5
-; RV64-NEXT:  .LBB15_3:
-; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsw ft0, 64(sp)
-; RV64-NEXT:    bnez a0, .LBB15_2
-; RV64-NEXT:  .LBB15_4:
-; RV64-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 15
-; RV64-NEXT:  .LBB15_5:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 124(sp)
-; RV64-NEXT:    bnez a0, .LBB15_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v28, v12, 14
-; RV64-NEXT:    j .LBB15_8
-; RV64-NEXT:  .LBB15_7:
-; RV64-NEXT:    vslidedown.vi v28, v8, 14
-; RV64-NEXT:  .LBB15_8:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 120(sp)
-; RV64-NEXT:    bnez a0, .LBB15_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v28, v12, 13
-; RV64-NEXT:    j .LBB15_11
-; RV64-NEXT:  .LBB15_10:
-; RV64-NEXT:    vslidedown.vi v28, v8, 13
-; RV64-NEXT:  .LBB15_11:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 116(sp)
-; RV64-NEXT:    bnez a0, .LBB15_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v28, v12, 12
-; RV64-NEXT:    j .LBB15_14
-; RV64-NEXT:  .LBB15_13:
-; RV64-NEXT:    vslidedown.vi v28, v8, 12
-; RV64-NEXT:  .LBB15_14:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 112(sp)
-; RV64-NEXT:    bnez a0, .LBB15_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v28, v12, 11
-; RV64-NEXT:    j .LBB15_17
-; RV64-NEXT:  .LBB15_16:
-; RV64-NEXT:    vslidedown.vi v28, v8, 11
-; RV64-NEXT:  .LBB15_17:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 108(sp)
-; RV64-NEXT:    bnez a0, .LBB15_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v28, v12, 10
-; RV64-NEXT:    j .LBB15_20
-; RV64-NEXT:  .LBB15_19:
-; RV64-NEXT:    vslidedown.vi v28, v8, 10
-; RV64-NEXT:  .LBB15_20:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 104(sp)
-; RV64-NEXT:    bnez a0, .LBB15_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v28, v12, 9
-; RV64-NEXT:    j .LBB15_23
-; RV64-NEXT:  .LBB15_22:
-; RV64-NEXT:    vslidedown.vi v28, v8, 9
-; RV64-NEXT:  .LBB15_23:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 100(sp)
-; RV64-NEXT:    bnez a0, .LBB15_25
-; RV64-NEXT:  # %bb.24:
-; RV64-NEXT:    vslidedown.vi v28, v12, 8
-; RV64-NEXT:    j .LBB15_26
-; RV64-NEXT:  .LBB15_25:
-; RV64-NEXT:    vslidedown.vi v28, v8, 8
-; RV64-NEXT:  .LBB15_26:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 96(sp)
-; RV64-NEXT:    bnez a0, .LBB15_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    vslidedown.vi v28, v12, 7
-; RV64-NEXT:    j .LBB15_29
-; RV64-NEXT:  .LBB15_28:
-; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:  .LBB15_29:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 92(sp)
-; RV64-NEXT:    bnez a0, .LBB15_31
-; RV64-NEXT:  # %bb.30:
-; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    j .LBB15_32
-; RV64-NEXT:  .LBB15_31:
-; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:  .LBB15_32:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 88(sp)
-; RV64-NEXT:    bnez a0, .LBB15_34
-; RV64-NEXT:  # %bb.33:
-; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    j .LBB15_35
-; RV64-NEXT:  .LBB15_34:
-; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:  .LBB15_35:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 84(sp)
-; RV64-NEXT:    bnez a0, .LBB15_37
-; RV64-NEXT:  # %bb.36:
-; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    j .LBB15_38
-; RV64-NEXT:  .LBB15_37:
-; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:  .LBB15_38:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 80(sp)
-; RV64-NEXT:    bnez a0, .LBB15_40
-; RV64-NEXT:  # %bb.39:
-; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    j .LBB15_41
-; RV64-NEXT:  .LBB15_40:
-; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:  .LBB15_41:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 76(sp)
-; RV64-NEXT:    bnez a0, .LBB15_43
-; RV64-NEXT:  # %bb.42:
-; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    j .LBB15_44
-; RV64-NEXT:  .LBB15_43:
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:  .LBB15_44:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 72(sp)
-; RV64-NEXT:    bnez a0, .LBB15_46
-; RV64-NEXT:  # %bb.45:
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
-; RV64-NEXT:    j .LBB15_47
-; RV64-NEXT:  .LBB15_46:
-; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:  .LBB15_47:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsw ft0, 68(sp)
-; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV64-NEXT:    addi a0, sp, 64
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -192
-; RV64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 192
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_v16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <16 x float> %c, <16 x float> %d
   ret <16 x float> %v
@@ -2268,24 +231,11 @@ define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x
 define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bnez a0, .LBB16_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB16_3
-; CHECK-NEXT:  .LBB16_2:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB16_3:
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, ft1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x double> %a, <2 x double> %b
   ret <2 x double> %v
@@ -2295,28 +245,11 @@ define <2 x double> @selectcc_v2f64(double %a, double %b, <2 x double> %c, <2 x
 ; CHECK-LABEL: selectcc_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    feq.d a0, fa0, fa1
-; CHECK-NEXT:    bnez a0, .LBB17_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    j .LBB17_3
-; CHECK-NEXT:  .LBB17_2:
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:  .LBB17_3:
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v25, ft0
-; CHECK-NEXT:    bnez a0, .LBB17_5
-; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    vfmv.f.s ft0, v9
-; CHECK-NEXT:    j .LBB17_6
-; CHECK-NEXT:  .LBB17_5:
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:  .LBB17_6:
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
-; CHECK-NEXT:    vfmv.s.f v25, ft0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <2 x double> %c, <2 x double> %d
@@ -2324,1317 +257,84 @@ define <2 x double> @selectcc_v2f64(double %a, double %b, <2 x double> %c, <2 x
 }
 
 define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> %b) {
-; RV32-LABEL: select_v4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -96
-; RV32-NEXT:    .cfi_def_cfa_offset 96
-; RV32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 96
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    bnez a0, .LBB18_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v10
-; RV32-NEXT:    fsd ft0, 32(sp)
-; RV32-NEXT:    beqz a0, .LBB18_4
-; RV32-NEXT:  .LBB18_2:
-; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    j .LBB18_5
-; RV32-NEXT:  .LBB18_3:
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 32(sp)
-; RV32-NEXT:    bnez a0, .LBB18_2
-; RV32-NEXT:  .LBB18_4:
-; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:  .LBB18_5:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsd ft0, 56(sp)
-; RV32-NEXT:    bnez a0, .LBB18_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    j .LBB18_8
-; RV32-NEXT:  .LBB18_7:
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:  .LBB18_8:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsd ft0, 48(sp)
-; RV32-NEXT:    bnez a0, .LBB18_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
-; RV32-NEXT:    j .LBB18_11
-; RV32-NEXT:  .LBB18_10:
-; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:  .LBB18_11:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsd ft0, 40(sp)
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -96
-; RV32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 96
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    .cfi_def_cfa_offset 96
-; RV64-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 96
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    bnez a0, .LBB18_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v10
-; RV64-NEXT:    fsd ft0, 32(sp)
-; RV64-NEXT:    beqz a0, .LBB18_4
-; RV64-NEXT:  .LBB18_2:
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    j .LBB18_5
-; RV64-NEXT:  .LBB18_3:
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 32(sp)
-; RV64-NEXT:    bnez a0, .LBB18_2
-; RV64-NEXT:  .LBB18_4:
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:  .LBB18_5:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsd ft0, 56(sp)
-; RV64-NEXT:    bnez a0, .LBB18_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    j .LBB18_8
-; RV64-NEXT:  .LBB18_7:
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:  .LBB18_8:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsd ft0, 48(sp)
-; RV64-NEXT:    bnez a0, .LBB18_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
-; RV64-NEXT:    j .LBB18_11
-; RV64-NEXT:  .LBB18_10:
-; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:  .LBB18_11:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsd ft0, 40(sp)
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -96
-; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <4 x double> %a, <4 x double> %b
   ret <4 x double> %v
 }
 
 define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x double> %d) {
-; RV32-LABEL: selectcc_v4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -96
-; RV32-NEXT:    .cfi_def_cfa_offset 96
-; RV32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 96
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    feq.d a0, fa0, fa1
-; RV32-NEXT:    bnez a0, .LBB19_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v10
-; RV32-NEXT:    fsd ft0, 32(sp)
-; RV32-NEXT:    beqz a0, .LBB19_4
-; RV32-NEXT:  .LBB19_2:
-; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    j .LBB19_5
-; RV32-NEXT:  .LBB19_3:
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 32(sp)
-; RV32-NEXT:    bnez a0, .LBB19_2
-; RV32-NEXT:  .LBB19_4:
-; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:  .LBB19_5:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsd ft0, 56(sp)
-; RV32-NEXT:    bnez a0, .LBB19_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    j .LBB19_8
-; RV32-NEXT:  .LBB19_7:
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:  .LBB19_8:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsd ft0, 48(sp)
-; RV32-NEXT:    bnez a0, .LBB19_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
-; RV32-NEXT:    j .LBB19_11
-; RV32-NEXT:  .LBB19_10:
-; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:  .LBB19_11:
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    fsd ft0, 40(sp)
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 32
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -96
-; RV32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 96
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_v4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -96
-; RV64-NEXT:    .cfi_def_cfa_offset 96
-; RV64-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 96
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    feq.d a0, fa0, fa1
-; RV64-NEXT:    bnez a0, .LBB19_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v10
-; RV64-NEXT:    fsd ft0, 32(sp)
-; RV64-NEXT:    beqz a0, .LBB19_4
-; RV64-NEXT:  .LBB19_2:
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    j .LBB19_5
-; RV64-NEXT:  .LBB19_3:
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 32(sp)
-; RV64-NEXT:    bnez a0, .LBB19_2
-; RV64-NEXT:  .LBB19_4:
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:  .LBB19_5:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsd ft0, 56(sp)
-; RV64-NEXT:    bnez a0, .LBB19_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    j .LBB19_8
-; RV64-NEXT:  .LBB19_7:
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:  .LBB19_8:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsd ft0, 48(sp)
-; RV64-NEXT:    bnez a0, .LBB19_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
-; RV64-NEXT:    j .LBB19_11
-; RV64-NEXT:  .LBB19_10:
-; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:  .LBB19_11:
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    fsd ft0, 40(sp)
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -96
-; RV64-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 96
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <4 x double> %c, <4 x double> %d
   ret <4 x double> %v
 }
 
 define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> %b) {
-; RV32-LABEL: select_v8f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -192
-; RV32-NEXT:    .cfi_def_cfa_offset 192
-; RV32-NEXT:    sw ra, 188(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 184(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 192
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -64
-; RV32-NEXT:    bnez a0, .LBB20_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v12
-; RV32-NEXT:    fsd ft0, 64(sp)
-; RV32-NEXT:    beqz a0, .LBB20_4
-; RV32-NEXT:  .LBB20_2:
-; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:    j .LBB20_5
-; RV32-NEXT:  .LBB20_3:
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 64(sp)
-; RV32-NEXT:    bnez a0, .LBB20_2
-; RV32-NEXT:  .LBB20_4:
-; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 7
-; RV32-NEXT:  .LBB20_5:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 120(sp)
-; RV32-NEXT:    bnez a0, .LBB20_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    j .LBB20_8
-; RV32-NEXT:  .LBB20_7:
-; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:  .LBB20_8:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 112(sp)
-; RV32-NEXT:    bnez a0, .LBB20_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    j .LBB20_11
-; RV32-NEXT:  .LBB20_10:
-; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:  .LBB20_11:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 104(sp)
-; RV32-NEXT:    bnez a0, .LBB20_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    j .LBB20_14
-; RV32-NEXT:  .LBB20_13:
-; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:  .LBB20_14:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 96(sp)
-; RV32-NEXT:    bnez a0, .LBB20_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    j .LBB20_17
-; RV32-NEXT:  .LBB20_16:
-; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:  .LBB20_17:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 88(sp)
-; RV32-NEXT:    bnez a0, .LBB20_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    j .LBB20_20
-; RV32-NEXT:  .LBB20_19:
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:  .LBB20_20:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 80(sp)
-; RV32-NEXT:    bnez a0, .LBB20_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
-; RV32-NEXT:    j .LBB20_23
-; RV32-NEXT:  .LBB20_22:
-; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:  .LBB20_23:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 72(sp)
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -192
-; RV32-NEXT:    lw s0, 184(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 192
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v8f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -192
-; RV64-NEXT:    .cfi_def_cfa_offset 192
-; RV64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 192
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -64
-; RV64-NEXT:    bnez a0, .LBB20_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v12
-; RV64-NEXT:    fsd ft0, 64(sp)
-; RV64-NEXT:    beqz a0, .LBB20_4
-; RV64-NEXT:  .LBB20_2:
-; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:    j .LBB20_5
-; RV64-NEXT:  .LBB20_3:
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 64(sp)
-; RV64-NEXT:    bnez a0, .LBB20_2
-; RV64-NEXT:  .LBB20_4:
-; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 7
-; RV64-NEXT:  .LBB20_5:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 120(sp)
-; RV64-NEXT:    bnez a0, .LBB20_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    j .LBB20_8
-; RV64-NEXT:  .LBB20_7:
-; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:  .LBB20_8:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 112(sp)
-; RV64-NEXT:    bnez a0, .LBB20_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    j .LBB20_11
-; RV64-NEXT:  .LBB20_10:
-; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:  .LBB20_11:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 104(sp)
-; RV64-NEXT:    bnez a0, .LBB20_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    j .LBB20_14
-; RV64-NEXT:  .LBB20_13:
-; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:  .LBB20_14:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 96(sp)
-; RV64-NEXT:    bnez a0, .LBB20_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    j .LBB20_17
-; RV64-NEXT:  .LBB20_16:
-; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:  .LBB20_17:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 88(sp)
-; RV64-NEXT:    bnez a0, .LBB20_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    j .LBB20_20
-; RV64-NEXT:  .LBB20_19:
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:  .LBB20_20:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 80(sp)
-; RV64-NEXT:    bnez a0, .LBB20_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
-; RV64-NEXT:    j .LBB20_23
-; RV64-NEXT:  .LBB20_22:
-; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:  .LBB20_23:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 72(sp)
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    addi a0, sp, 64
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -192
-; RV64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 192
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <8 x double> %a, <8 x double> %b
   ret <8 x double> %v
 }
 
 define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x double> %d) {
-; RV32-LABEL: selectcc_v8f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -192
-; RV32-NEXT:    .cfi_def_cfa_offset 192
-; RV32-NEXT:    sw ra, 188(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 184(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 192
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -64
-; RV32-NEXT:    feq.d a0, fa0, fa1
-; RV32-NEXT:    bnez a0, .LBB21_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v12
-; RV32-NEXT:    fsd ft0, 64(sp)
-; RV32-NEXT:    beqz a0, .LBB21_4
-; RV32-NEXT:  .LBB21_2:
-; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:    j .LBB21_5
-; RV32-NEXT:  .LBB21_3:
-; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 64(sp)
-; RV32-NEXT:    bnez a0, .LBB21_2
-; RV32-NEXT:  .LBB21_4:
-; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 7
-; RV32-NEXT:  .LBB21_5:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 120(sp)
-; RV32-NEXT:    bnez a0, .LBB21_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    j .LBB21_8
-; RV32-NEXT:  .LBB21_7:
-; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:  .LBB21_8:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 112(sp)
-; RV32-NEXT:    bnez a0, .LBB21_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    j .LBB21_11
-; RV32-NEXT:  .LBB21_10:
-; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:  .LBB21_11:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 104(sp)
-; RV32-NEXT:    bnez a0, .LBB21_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    j .LBB21_14
-; RV32-NEXT:  .LBB21_13:
-; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:  .LBB21_14:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 96(sp)
-; RV32-NEXT:    bnez a0, .LBB21_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    j .LBB21_17
-; RV32-NEXT:  .LBB21_16:
-; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:  .LBB21_17:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 88(sp)
-; RV32-NEXT:    bnez a0, .LBB21_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    j .LBB21_20
-; RV32-NEXT:  .LBB21_19:
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:  .LBB21_20:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 80(sp)
-; RV32-NEXT:    bnez a0, .LBB21_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
-; RV32-NEXT:    j .LBB21_23
-; RV32-NEXT:  .LBB21_22:
-; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:  .LBB21_23:
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    fsd ft0, 72(sp)
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -192
-; RV32-NEXT:    lw s0, 184(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 192
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_v8f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -192
-; RV64-NEXT:    .cfi_def_cfa_offset 192
-; RV64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 192
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -64
-; RV64-NEXT:    feq.d a0, fa0, fa1
-; RV64-NEXT:    bnez a0, .LBB21_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v12
-; RV64-NEXT:    fsd ft0, 64(sp)
-; RV64-NEXT:    beqz a0, .LBB21_4
-; RV64-NEXT:  .LBB21_2:
-; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:    j .LBB21_5
-; RV64-NEXT:  .LBB21_3:
-; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 64(sp)
-; RV64-NEXT:    bnez a0, .LBB21_2
-; RV64-NEXT:  .LBB21_4:
-; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 7
-; RV64-NEXT:  .LBB21_5:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 120(sp)
-; RV64-NEXT:    bnez a0, .LBB21_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    j .LBB21_8
-; RV64-NEXT:  .LBB21_7:
-; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:  .LBB21_8:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 112(sp)
-; RV64-NEXT:    bnez a0, .LBB21_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    j .LBB21_11
-; RV64-NEXT:  .LBB21_10:
-; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:  .LBB21_11:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 104(sp)
-; RV64-NEXT:    bnez a0, .LBB21_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    j .LBB21_14
-; RV64-NEXT:  .LBB21_13:
-; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:  .LBB21_14:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 96(sp)
-; RV64-NEXT:    bnez a0, .LBB21_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    j .LBB21_17
-; RV64-NEXT:  .LBB21_16:
-; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:  .LBB21_17:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 88(sp)
-; RV64-NEXT:    bnez a0, .LBB21_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    j .LBB21_20
-; RV64-NEXT:  .LBB21_19:
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:  .LBB21_20:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 80(sp)
-; RV64-NEXT:    bnez a0, .LBB21_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
-; RV64-NEXT:    j .LBB21_23
-; RV64-NEXT:  .LBB21_22:
-; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:  .LBB21_23:
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    fsd ft0, 72(sp)
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    addi a0, sp, 64
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -192
-; RV64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 192
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <8 x double> %c, <8 x double> %d
   ret <8 x double> %v
 }
 
 define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x double> %b) {
-; RV32-LABEL: select_v16f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -384
-; RV32-NEXT:    .cfi_def_cfa_offset 384
-; RV32-NEXT:    sw ra, 380(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 376(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 384
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -128
-; RV32-NEXT:    bnez a0, .LBB22_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v16
-; RV32-NEXT:    fsd ft0, 128(sp)
-; RV32-NEXT:    beqz a0, .LBB22_4
-; RV32-NEXT:  .LBB22_2:
-; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV32-NEXT:    vslidedown.vi v24, v8, 15
-; RV32-NEXT:    j .LBB22_5
-; RV32-NEXT:  .LBB22_3:
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 128(sp)
-; RV32-NEXT:    bnez a0, .LBB22_2
-; RV32-NEXT:  .LBB22_4:
-; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 15
-; RV32-NEXT:  .LBB22_5:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 248(sp)
-; RV32-NEXT:    bnez a0, .LBB22_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v24, v16, 14
-; RV32-NEXT:    j .LBB22_8
-; RV32-NEXT:  .LBB22_7:
-; RV32-NEXT:    vslidedown.vi v24, v8, 14
-; RV32-NEXT:  .LBB22_8:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 240(sp)
-; RV32-NEXT:    bnez a0, .LBB22_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v24, v16, 13
-; RV32-NEXT:    j .LBB22_11
-; RV32-NEXT:  .LBB22_10:
-; RV32-NEXT:    vslidedown.vi v24, v8, 13
-; RV32-NEXT:  .LBB22_11:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 232(sp)
-; RV32-NEXT:    bnez a0, .LBB22_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v24, v16, 12
-; RV32-NEXT:    j .LBB22_14
-; RV32-NEXT:  .LBB22_13:
-; RV32-NEXT:    vslidedown.vi v24, v8, 12
-; RV32-NEXT:  .LBB22_14:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 224(sp)
-; RV32-NEXT:    bnez a0, .LBB22_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v24, v16, 11
-; RV32-NEXT:    j .LBB22_17
-; RV32-NEXT:  .LBB22_16:
-; RV32-NEXT:    vslidedown.vi v24, v8, 11
-; RV32-NEXT:  .LBB22_17:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 216(sp)
-; RV32-NEXT:    bnez a0, .LBB22_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v24, v16, 10
-; RV32-NEXT:    j .LBB22_20
-; RV32-NEXT:  .LBB22_19:
-; RV32-NEXT:    vslidedown.vi v24, v8, 10
-; RV32-NEXT:  .LBB22_20:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 208(sp)
-; RV32-NEXT:    bnez a0, .LBB22_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v24, v16, 9
-; RV32-NEXT:    j .LBB22_23
-; RV32-NEXT:  .LBB22_22:
-; RV32-NEXT:    vslidedown.vi v24, v8, 9
-; RV32-NEXT:  .LBB22_23:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 200(sp)
-; RV32-NEXT:    bnez a0, .LBB22_25
-; RV32-NEXT:  # %bb.24:
-; RV32-NEXT:    vslidedown.vi v24, v16, 8
-; RV32-NEXT:    j .LBB22_26
-; RV32-NEXT:  .LBB22_25:
-; RV32-NEXT:    vslidedown.vi v24, v8, 8
-; RV32-NEXT:  .LBB22_26:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 192(sp)
-; RV32-NEXT:    bnez a0, .LBB22_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    vslidedown.vi v24, v16, 7
-; RV32-NEXT:    j .LBB22_29
-; RV32-NEXT:  .LBB22_28:
-; RV32-NEXT:    vslidedown.vi v24, v8, 7
-; RV32-NEXT:  .LBB22_29:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 184(sp)
-; RV32-NEXT:    bnez a0, .LBB22_31
-; RV32-NEXT:  # %bb.30:
-; RV32-NEXT:    vslidedown.vi v24, v16, 6
-; RV32-NEXT:    j .LBB22_32
-; RV32-NEXT:  .LBB22_31:
-; RV32-NEXT:    vslidedown.vi v24, v8, 6
-; RV32-NEXT:  .LBB22_32:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 176(sp)
-; RV32-NEXT:    bnez a0, .LBB22_34
-; RV32-NEXT:  # %bb.33:
-; RV32-NEXT:    vslidedown.vi v24, v16, 5
-; RV32-NEXT:    j .LBB22_35
-; RV32-NEXT:  .LBB22_34:
-; RV32-NEXT:    vslidedown.vi v24, v8, 5
-; RV32-NEXT:  .LBB22_35:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 168(sp)
-; RV32-NEXT:    bnez a0, .LBB22_37
-; RV32-NEXT:  # %bb.36:
-; RV32-NEXT:    vslidedown.vi v24, v16, 4
-; RV32-NEXT:    j .LBB22_38
-; RV32-NEXT:  .LBB22_37:
-; RV32-NEXT:    vslidedown.vi v24, v8, 4
-; RV32-NEXT:  .LBB22_38:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 160(sp)
-; RV32-NEXT:    bnez a0, .LBB22_40
-; RV32-NEXT:  # %bb.39:
-; RV32-NEXT:    vslidedown.vi v24, v16, 3
-; RV32-NEXT:    j .LBB22_41
-; RV32-NEXT:  .LBB22_40:
-; RV32-NEXT:    vslidedown.vi v24, v8, 3
-; RV32-NEXT:  .LBB22_41:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 152(sp)
-; RV32-NEXT:    bnez a0, .LBB22_43
-; RV32-NEXT:  # %bb.42:
-; RV32-NEXT:    vslidedown.vi v24, v16, 2
-; RV32-NEXT:    j .LBB22_44
-; RV32-NEXT:  .LBB22_43:
-; RV32-NEXT:    vslidedown.vi v24, v8, 2
-; RV32-NEXT:  .LBB22_44:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 144(sp)
-; RV32-NEXT:    bnez a0, .LBB22_46
-; RV32-NEXT:  # %bb.45:
-; RV32-NEXT:    vslidedown.vi v8, v16, 1
-; RV32-NEXT:    j .LBB22_47
-; RV32-NEXT:  .LBB22_46:
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:  .LBB22_47:
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 136(sp)
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -384
-; RV32-NEXT:    lw s0, 376(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 380(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 384
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v16f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -384
-; RV64-NEXT:    .cfi_def_cfa_offset 384
-; RV64-NEXT:    sd ra, 376(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 368(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 384
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -128
-; RV64-NEXT:    bnez a0, .LBB22_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v16
-; RV64-NEXT:    fsd ft0, 128(sp)
-; RV64-NEXT:    beqz a0, .LBB22_4
-; RV64-NEXT:  .LBB22_2:
-; RV64-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV64-NEXT:    vslidedown.vi v24, v8, 15
-; RV64-NEXT:    j .LBB22_5
-; RV64-NEXT:  .LBB22_3:
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 128(sp)
-; RV64-NEXT:    bnez a0, .LBB22_2
-; RV64-NEXT:  .LBB22_4:
-; RV64-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV64-NEXT:    vslidedown.vi v24, v16, 15
-; RV64-NEXT:  .LBB22_5:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 248(sp)
-; RV64-NEXT:    bnez a0, .LBB22_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v24, v16, 14
-; RV64-NEXT:    j .LBB22_8
-; RV64-NEXT:  .LBB22_7:
-; RV64-NEXT:    vslidedown.vi v24, v8, 14
-; RV64-NEXT:  .LBB22_8:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 240(sp)
-; RV64-NEXT:    bnez a0, .LBB22_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v24, v16, 13
-; RV64-NEXT:    j .LBB22_11
-; RV64-NEXT:  .LBB22_10:
-; RV64-NEXT:    vslidedown.vi v24, v8, 13
-; RV64-NEXT:  .LBB22_11:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 232(sp)
-; RV64-NEXT:    bnez a0, .LBB22_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v24, v16, 12
-; RV64-NEXT:    j .LBB22_14
-; RV64-NEXT:  .LBB22_13:
-; RV64-NEXT:    vslidedown.vi v24, v8, 12
-; RV64-NEXT:  .LBB22_14:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 224(sp)
-; RV64-NEXT:    bnez a0, .LBB22_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v24, v16, 11
-; RV64-NEXT:    j .LBB22_17
-; RV64-NEXT:  .LBB22_16:
-; RV64-NEXT:    vslidedown.vi v24, v8, 11
-; RV64-NEXT:  .LBB22_17:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 216(sp)
-; RV64-NEXT:    bnez a0, .LBB22_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v24, v16, 10
-; RV64-NEXT:    j .LBB22_20
-; RV64-NEXT:  .LBB22_19:
-; RV64-NEXT:    vslidedown.vi v24, v8, 10
-; RV64-NEXT:  .LBB22_20:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 208(sp)
-; RV64-NEXT:    bnez a0, .LBB22_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v24, v16, 9
-; RV64-NEXT:    j .LBB22_23
-; RV64-NEXT:  .LBB22_22:
-; RV64-NEXT:    vslidedown.vi v24, v8, 9
-; RV64-NEXT:  .LBB22_23:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 200(sp)
-; RV64-NEXT:    bnez a0, .LBB22_25
-; RV64-NEXT:  # %bb.24:
-; RV64-NEXT:    vslidedown.vi v24, v16, 8
-; RV64-NEXT:    j .LBB22_26
-; RV64-NEXT:  .LBB22_25:
-; RV64-NEXT:    vslidedown.vi v24, v8, 8
-; RV64-NEXT:  .LBB22_26:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 192(sp)
-; RV64-NEXT:    bnez a0, .LBB22_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    vslidedown.vi v24, v16, 7
-; RV64-NEXT:    j .LBB22_29
-; RV64-NEXT:  .LBB22_28:
-; RV64-NEXT:    vslidedown.vi v24, v8, 7
-; RV64-NEXT:  .LBB22_29:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 184(sp)
-; RV64-NEXT:    bnez a0, .LBB22_31
-; RV64-NEXT:  # %bb.30:
-; RV64-NEXT:    vslidedown.vi v24, v16, 6
-; RV64-NEXT:    j .LBB22_32
-; RV64-NEXT:  .LBB22_31:
-; RV64-NEXT:    vslidedown.vi v24, v8, 6
-; RV64-NEXT:  .LBB22_32:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 176(sp)
-; RV64-NEXT:    bnez a0, .LBB22_34
-; RV64-NEXT:  # %bb.33:
-; RV64-NEXT:    vslidedown.vi v24, v16, 5
-; RV64-NEXT:    j .LBB22_35
-; RV64-NEXT:  .LBB22_34:
-; RV64-NEXT:    vslidedown.vi v24, v8, 5
-; RV64-NEXT:  .LBB22_35:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 168(sp)
-; RV64-NEXT:    bnez a0, .LBB22_37
-; RV64-NEXT:  # %bb.36:
-; RV64-NEXT:    vslidedown.vi v24, v16, 4
-; RV64-NEXT:    j .LBB22_38
-; RV64-NEXT:  .LBB22_37:
-; RV64-NEXT:    vslidedown.vi v24, v8, 4
-; RV64-NEXT:  .LBB22_38:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 160(sp)
-; RV64-NEXT:    bnez a0, .LBB22_40
-; RV64-NEXT:  # %bb.39:
-; RV64-NEXT:    vslidedown.vi v24, v16, 3
-; RV64-NEXT:    j .LBB22_41
-; RV64-NEXT:  .LBB22_40:
-; RV64-NEXT:    vslidedown.vi v24, v8, 3
-; RV64-NEXT:  .LBB22_41:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 152(sp)
-; RV64-NEXT:    bnez a0, .LBB22_43
-; RV64-NEXT:  # %bb.42:
-; RV64-NEXT:    vslidedown.vi v24, v16, 2
-; RV64-NEXT:    j .LBB22_44
-; RV64-NEXT:  .LBB22_43:
-; RV64-NEXT:    vslidedown.vi v24, v8, 2
-; RV64-NEXT:  .LBB22_44:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 144(sp)
-; RV64-NEXT:    bnez a0, .LBB22_46
-; RV64-NEXT:  # %bb.45:
-; RV64-NEXT:    vslidedown.vi v8, v16, 1
-; RV64-NEXT:    j .LBB22_47
-; RV64-NEXT:  .LBB22_46:
-; RV64-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-NEXT:  .LBB22_47:
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 136(sp)
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV64-NEXT:    addi a0, sp, 128
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -384
-; RV64-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 384
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v16f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <16 x double> %a, <16 x double> %b
   ret <16 x double> %v
 }
 
 define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <16 x double> %d) {
-; RV32-LABEL: selectcc_v16f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -384
-; RV32-NEXT:    .cfi_def_cfa_offset 384
-; RV32-NEXT:    sw ra, 380(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 376(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 384
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    andi sp, sp, -128
-; RV32-NEXT:    feq.d a0, fa0, fa1
-; RV32-NEXT:    bnez a0, .LBB23_3
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v16
-; RV32-NEXT:    fsd ft0, 128(sp)
-; RV32-NEXT:    beqz a0, .LBB23_4
-; RV32-NEXT:  .LBB23_2:
-; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV32-NEXT:    vslidedown.vi v24, v8, 15
-; RV32-NEXT:    j .LBB23_5
-; RV32-NEXT:  .LBB23_3:
-; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 128(sp)
-; RV32-NEXT:    bnez a0, .LBB23_2
-; RV32-NEXT:  .LBB23_4:
-; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 15
-; RV32-NEXT:  .LBB23_5:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 248(sp)
-; RV32-NEXT:    bnez a0, .LBB23_7
-; RV32-NEXT:  # %bb.6:
-; RV32-NEXT:    vslidedown.vi v24, v16, 14
-; RV32-NEXT:    j .LBB23_8
-; RV32-NEXT:  .LBB23_7:
-; RV32-NEXT:    vslidedown.vi v24, v8, 14
-; RV32-NEXT:  .LBB23_8:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 240(sp)
-; RV32-NEXT:    bnez a0, .LBB23_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    vslidedown.vi v24, v16, 13
-; RV32-NEXT:    j .LBB23_11
-; RV32-NEXT:  .LBB23_10:
-; RV32-NEXT:    vslidedown.vi v24, v8, 13
-; RV32-NEXT:  .LBB23_11:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 232(sp)
-; RV32-NEXT:    bnez a0, .LBB23_13
-; RV32-NEXT:  # %bb.12:
-; RV32-NEXT:    vslidedown.vi v24, v16, 12
-; RV32-NEXT:    j .LBB23_14
-; RV32-NEXT:  .LBB23_13:
-; RV32-NEXT:    vslidedown.vi v24, v8, 12
-; RV32-NEXT:  .LBB23_14:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 224(sp)
-; RV32-NEXT:    bnez a0, .LBB23_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    vslidedown.vi v24, v16, 11
-; RV32-NEXT:    j .LBB23_17
-; RV32-NEXT:  .LBB23_16:
-; RV32-NEXT:    vslidedown.vi v24, v8, 11
-; RV32-NEXT:  .LBB23_17:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 216(sp)
-; RV32-NEXT:    bnez a0, .LBB23_19
-; RV32-NEXT:  # %bb.18:
-; RV32-NEXT:    vslidedown.vi v24, v16, 10
-; RV32-NEXT:    j .LBB23_20
-; RV32-NEXT:  .LBB23_19:
-; RV32-NEXT:    vslidedown.vi v24, v8, 10
-; RV32-NEXT:  .LBB23_20:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 208(sp)
-; RV32-NEXT:    bnez a0, .LBB23_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    vslidedown.vi v24, v16, 9
-; RV32-NEXT:    j .LBB23_23
-; RV32-NEXT:  .LBB23_22:
-; RV32-NEXT:    vslidedown.vi v24, v8, 9
-; RV32-NEXT:  .LBB23_23:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 200(sp)
-; RV32-NEXT:    bnez a0, .LBB23_25
-; RV32-NEXT:  # %bb.24:
-; RV32-NEXT:    vslidedown.vi v24, v16, 8
-; RV32-NEXT:    j .LBB23_26
-; RV32-NEXT:  .LBB23_25:
-; RV32-NEXT:    vslidedown.vi v24, v8, 8
-; RV32-NEXT:  .LBB23_26:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 192(sp)
-; RV32-NEXT:    bnez a0, .LBB23_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    vslidedown.vi v24, v16, 7
-; RV32-NEXT:    j .LBB23_29
-; RV32-NEXT:  .LBB23_28:
-; RV32-NEXT:    vslidedown.vi v24, v8, 7
-; RV32-NEXT:  .LBB23_29:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 184(sp)
-; RV32-NEXT:    bnez a0, .LBB23_31
-; RV32-NEXT:  # %bb.30:
-; RV32-NEXT:    vslidedown.vi v24, v16, 6
-; RV32-NEXT:    j .LBB23_32
-; RV32-NEXT:  .LBB23_31:
-; RV32-NEXT:    vslidedown.vi v24, v8, 6
-; RV32-NEXT:  .LBB23_32:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 176(sp)
-; RV32-NEXT:    bnez a0, .LBB23_34
-; RV32-NEXT:  # %bb.33:
-; RV32-NEXT:    vslidedown.vi v24, v16, 5
-; RV32-NEXT:    j .LBB23_35
-; RV32-NEXT:  .LBB23_34:
-; RV32-NEXT:    vslidedown.vi v24, v8, 5
-; RV32-NEXT:  .LBB23_35:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 168(sp)
-; RV32-NEXT:    bnez a0, .LBB23_37
-; RV32-NEXT:  # %bb.36:
-; RV32-NEXT:    vslidedown.vi v24, v16, 4
-; RV32-NEXT:    j .LBB23_38
-; RV32-NEXT:  .LBB23_37:
-; RV32-NEXT:    vslidedown.vi v24, v8, 4
-; RV32-NEXT:  .LBB23_38:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 160(sp)
-; RV32-NEXT:    bnez a0, .LBB23_40
-; RV32-NEXT:  # %bb.39:
-; RV32-NEXT:    vslidedown.vi v24, v16, 3
-; RV32-NEXT:    j .LBB23_41
-; RV32-NEXT:  .LBB23_40:
-; RV32-NEXT:    vslidedown.vi v24, v8, 3
-; RV32-NEXT:  .LBB23_41:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 152(sp)
-; RV32-NEXT:    bnez a0, .LBB23_43
-; RV32-NEXT:  # %bb.42:
-; RV32-NEXT:    vslidedown.vi v24, v16, 2
-; RV32-NEXT:    j .LBB23_44
-; RV32-NEXT:  .LBB23_43:
-; RV32-NEXT:    vslidedown.vi v24, v8, 2
-; RV32-NEXT:  .LBB23_44:
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    fsd ft0, 144(sp)
-; RV32-NEXT:    bnez a0, .LBB23_46
-; RV32-NEXT:  # %bb.45:
-; RV32-NEXT:    vslidedown.vi v8, v16, 1
-; RV32-NEXT:    j .LBB23_47
-; RV32-NEXT:  .LBB23_46:
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:  .LBB23_47:
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    fsd ft0, 136(sp)
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -384
-; RV32-NEXT:    lw s0, 376(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw ra, 380(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 384
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_v16f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -384
-; RV64-NEXT:    .cfi_def_cfa_offset 384
-; RV64-NEXT:    sd ra, 376(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 368(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 384
-; RV64-NEXT:    .cfi_def_cfa s0, 0
-; RV64-NEXT:    andi sp, sp, -128
-; RV64-NEXT:    feq.d a0, fa0, fa1
-; RV64-NEXT:    bnez a0, .LBB23_3
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v16
-; RV64-NEXT:    fsd ft0, 128(sp)
-; RV64-NEXT:    beqz a0, .LBB23_4
-; RV64-NEXT:  .LBB23_2:
-; RV64-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV64-NEXT:    vslidedown.vi v24, v8, 15
-; RV64-NEXT:    j .LBB23_5
-; RV64-NEXT:  .LBB23_3:
-; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 128(sp)
-; RV64-NEXT:    bnez a0, .LBB23_2
-; RV64-NEXT:  .LBB23_4:
-; RV64-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV64-NEXT:    vslidedown.vi v24, v16, 15
-; RV64-NEXT:  .LBB23_5:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 248(sp)
-; RV64-NEXT:    bnez a0, .LBB23_7
-; RV64-NEXT:  # %bb.6:
-; RV64-NEXT:    vslidedown.vi v24, v16, 14
-; RV64-NEXT:    j .LBB23_8
-; RV64-NEXT:  .LBB23_7:
-; RV64-NEXT:    vslidedown.vi v24, v8, 14
-; RV64-NEXT:  .LBB23_8:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 240(sp)
-; RV64-NEXT:    bnez a0, .LBB23_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    vslidedown.vi v24, v16, 13
-; RV64-NEXT:    j .LBB23_11
-; RV64-NEXT:  .LBB23_10:
-; RV64-NEXT:    vslidedown.vi v24, v8, 13
-; RV64-NEXT:  .LBB23_11:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 232(sp)
-; RV64-NEXT:    bnez a0, .LBB23_13
-; RV64-NEXT:  # %bb.12:
-; RV64-NEXT:    vslidedown.vi v24, v16, 12
-; RV64-NEXT:    j .LBB23_14
-; RV64-NEXT:  .LBB23_13:
-; RV64-NEXT:    vslidedown.vi v24, v8, 12
-; RV64-NEXT:  .LBB23_14:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 224(sp)
-; RV64-NEXT:    bnez a0, .LBB23_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    vslidedown.vi v24, v16, 11
-; RV64-NEXT:    j .LBB23_17
-; RV64-NEXT:  .LBB23_16:
-; RV64-NEXT:    vslidedown.vi v24, v8, 11
-; RV64-NEXT:  .LBB23_17:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 216(sp)
-; RV64-NEXT:    bnez a0, .LBB23_19
-; RV64-NEXT:  # %bb.18:
-; RV64-NEXT:    vslidedown.vi v24, v16, 10
-; RV64-NEXT:    j .LBB23_20
-; RV64-NEXT:  .LBB23_19:
-; RV64-NEXT:    vslidedown.vi v24, v8, 10
-; RV64-NEXT:  .LBB23_20:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 208(sp)
-; RV64-NEXT:    bnez a0, .LBB23_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    vslidedown.vi v24, v16, 9
-; RV64-NEXT:    j .LBB23_23
-; RV64-NEXT:  .LBB23_22:
-; RV64-NEXT:    vslidedown.vi v24, v8, 9
-; RV64-NEXT:  .LBB23_23:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 200(sp)
-; RV64-NEXT:    bnez a0, .LBB23_25
-; RV64-NEXT:  # %bb.24:
-; RV64-NEXT:    vslidedown.vi v24, v16, 8
-; RV64-NEXT:    j .LBB23_26
-; RV64-NEXT:  .LBB23_25:
-; RV64-NEXT:    vslidedown.vi v24, v8, 8
-; RV64-NEXT:  .LBB23_26:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 192(sp)
-; RV64-NEXT:    bnez a0, .LBB23_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    vslidedown.vi v24, v16, 7
-; RV64-NEXT:    j .LBB23_29
-; RV64-NEXT:  .LBB23_28:
-; RV64-NEXT:    vslidedown.vi v24, v8, 7
-; RV64-NEXT:  .LBB23_29:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 184(sp)
-; RV64-NEXT:    bnez a0, .LBB23_31
-; RV64-NEXT:  # %bb.30:
-; RV64-NEXT:    vslidedown.vi v24, v16, 6
-; RV64-NEXT:    j .LBB23_32
-; RV64-NEXT:  .LBB23_31:
-; RV64-NEXT:    vslidedown.vi v24, v8, 6
-; RV64-NEXT:  .LBB23_32:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 176(sp)
-; RV64-NEXT:    bnez a0, .LBB23_34
-; RV64-NEXT:  # %bb.33:
-; RV64-NEXT:    vslidedown.vi v24, v16, 5
-; RV64-NEXT:    j .LBB23_35
-; RV64-NEXT:  .LBB23_34:
-; RV64-NEXT:    vslidedown.vi v24, v8, 5
-; RV64-NEXT:  .LBB23_35:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 168(sp)
-; RV64-NEXT:    bnez a0, .LBB23_37
-; RV64-NEXT:  # %bb.36:
-; RV64-NEXT:    vslidedown.vi v24, v16, 4
-; RV64-NEXT:    j .LBB23_38
-; RV64-NEXT:  .LBB23_37:
-; RV64-NEXT:    vslidedown.vi v24, v8, 4
-; RV64-NEXT:  .LBB23_38:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 160(sp)
-; RV64-NEXT:    bnez a0, .LBB23_40
-; RV64-NEXT:  # %bb.39:
-; RV64-NEXT:    vslidedown.vi v24, v16, 3
-; RV64-NEXT:    j .LBB23_41
-; RV64-NEXT:  .LBB23_40:
-; RV64-NEXT:    vslidedown.vi v24, v8, 3
-; RV64-NEXT:  .LBB23_41:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 152(sp)
-; RV64-NEXT:    bnez a0, .LBB23_43
-; RV64-NEXT:  # %bb.42:
-; RV64-NEXT:    vslidedown.vi v24, v16, 2
-; RV64-NEXT:    j .LBB23_44
-; RV64-NEXT:  .LBB23_43:
-; RV64-NEXT:    vslidedown.vi v24, v8, 2
-; RV64-NEXT:  .LBB23_44:
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    fsd ft0, 144(sp)
-; RV64-NEXT:    bnez a0, .LBB23_46
-; RV64-NEXT:  # %bb.45:
-; RV64-NEXT:    vslidedown.vi v8, v16, 1
-; RV64-NEXT:    j .LBB23_47
-; RV64-NEXT:  .LBB23_46:
-; RV64-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-NEXT:  .LBB23_47:
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    fsd ft0, 136(sp)
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV64-NEXT:    addi a0, sp, 128
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi sp, s0, -384
-; RV64-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 384
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_v16f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <16 x double> %c, <16 x double> %d
   ret <16 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
index 153c5239ad581..536091d1c1f63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
@@ -7,13 +7,8 @@
 define <1 x i1> @select_v1i1(i1 zeroext %c, <1 x i1> %a, <1 x i1> %b) {
 ; CHECK-LABEL: select_v1i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB0_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -27,12 +22,7 @@ define <1 x i1> @selectcc_v1i1(i1 signext %a, i1 signext %b, <1 x i1> %c, <1 x i
 ; CHECK-LABEL: selectcc_v1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB1_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -48,13 +38,8 @@ define <1 x i1> @selectcc_v1i1(i1 signext %a, i1 signext %b, <1 x i1> %c, <1 x i
 define <2 x i1> @select_v2i1(i1 zeroext %c, <2 x i1> %a, <2 x i1> %b) {
 ; CHECK-LABEL: select_v2i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB2_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB2_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -68,12 +53,7 @@ define <2 x i1> @selectcc_v2i1(i1 signext %a, i1 signext %b, <2 x i1> %c, <2 x i
 ; CHECK-LABEL: selectcc_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB3_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB3_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -89,13 +69,8 @@ define <2 x i1> @selectcc_v2i1(i1 signext %a, i1 signext %b, <2 x i1> %c, <2 x i
 define <4 x i1> @select_v4i1(i1 zeroext %c, <4 x i1> %a, <4 x i1> %b) {
 ; CHECK-LABEL: select_v4i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB4_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB4_2:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -109,12 +84,7 @@ define <4 x i1> @selectcc_v4i1(i1 signext %a, i1 signext %b, <4 x i1> %c, <4 x i
 ; CHECK-LABEL: selectcc_v4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB5_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -130,13 +100,8 @@ define <4 x i1> @selectcc_v4i1(i1 signext %a, i1 signext %b, <4 x i1> %c, <4 x i
 define <8 x i1> @select_v8i1(i1 zeroext %c, <8 x i1> %a, <8 x i1> %b) {
 ; CHECK-LABEL: select_v8i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB6_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB6_2:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -150,12 +115,7 @@ define <8 x i1> @selectcc_v8i1(i1 signext %a, i1 signext %b, <8 x i1> %c, <8 x i
 ; CHECK-LABEL: selectcc_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB7_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -171,13 +131,8 @@ define <8 x i1> @selectcc_v8i1(i1 signext %a, i1 signext %b, <8 x i1> %c, <8 x i
 define <16 x i1> @select_v16i1(i1 zeroext %c, <16 x i1> %a, <16 x i1> %b) {
 ; CHECK-LABEL: select_v16i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB8_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB8_2:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -191,12 +146,7 @@ define <16 x i1> @selectcc_v16i1(i1 signext %a, i1 signext %b, <16 x i1> %c, <16
 ; CHECK-LABEL: selectcc_v16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB9_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB9_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -212,17 +162,10 @@ define <16 x i1> @selectcc_v16i1(i1 signext %a, i1 signext %b, <16 x i1> %c, <16
 define <2 x i8> @select_v2i8(i1 zeroext %c, <2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: select_v2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB10_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB10_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x i8> %a, <2 x i8> %b
   ret <2 x i8> %v
@@ -231,17 +174,12 @@ define <2 x i8> @select_v2i8(i1 zeroext %c, <2 x i8> %a, <2 x i8> %b) {
 define <2 x i8> @selectcc_v2i8(i8 signext %a, i8 signext %b, <2 x i8> %c, <2 x i8> %d) {
 ; CHECK-LABEL: selectcc_v2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB11_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <2 x i8> %c, <2 x i8> %d
@@ -251,17 +189,10 @@ define <2 x i8> @selectcc_v2i8(i8 signext %a, i8 signext %b, <2 x i8> %c, <2 x i
 define <4 x i8> @select_v4i8(i1 zeroext %c, <4 x i8> %a, <4 x i8> %b) {
 ; CHECK-LABEL: select_v4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB12_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB12_2:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <4 x i8> %a, <4 x i8> %b
   ret <4 x i8> %v
@@ -270,17 +201,12 @@ define <4 x i8> @select_v4i8(i1 zeroext %c, <4 x i8> %a, <4 x i8> %b) {
 define <4 x i8> @selectcc_v4i8(i8 signext %a, i8 signext %b, <4 x i8> %c, <4 x i8> %d) {
 ; CHECK-LABEL: selectcc_v4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB13_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB13_2:
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <4 x i8> %c, <4 x i8> %d
@@ -290,17 +216,10 @@ define <4 x i8> @selectcc_v4i8(i8 signext %a, i8 signext %b, <4 x i8> %c, <4 x i
 define <8 x i8> @select_v8i8(i1 zeroext %c, <8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: select_v8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB14_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB14_2:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <8 x i8> %a, <8 x i8> %b
   ret <8 x i8> %v
@@ -309,17 +228,12 @@ define <8 x i8> @select_v8i8(i1 zeroext %c, <8 x i8> %a, <8 x i8> %b) {
 define <8 x i8> @selectcc_v8i8(i8 signext %a, i8 signext %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: selectcc_v8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB15_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB15_2:
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <8 x i8> %c, <8 x i8> %d
@@ -329,17 +243,10 @@ define <8 x i8> @selectcc_v8i8(i8 signext %a, i8 signext %b, <8 x i8> %c, <8 x i
 define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: select_v16i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB16_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB16_2:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <16 x i8> %a, <16 x i8> %b
   ret <16 x i8> %v
@@ -348,17 +255,12 @@ define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %a, <16 x i8> %b) {
 define <16 x i8> @selectcc_v16i8(i8 signext %a, i8 signext %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-LABEL: selectcc_v16i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB17_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB17_2:
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <16 x i8> %c, <16 x i8> %d
@@ -368,17 +270,11 @@ define <16 x i8> @selectcc_v16i8(i8 signext %a, i8 signext %b, <16 x i8> %c, <16
 define <2 x i16> @select_v2i16(i1 zeroext %c, <2 x i16> %a, <2 x i16> %b) {
 ; CHECK-LABEL: select_v2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB18_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB18_2:
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x i16> %a, <2 x i16> %b
   ret <2 x i16> %v
@@ -387,17 +283,13 @@ define <2 x i16> @select_v2i16(i1 zeroext %c, <2 x i16> %a, <2 x i16> %b) {
 define <2 x i16> @selectcc_v2i16(i16 signext %a, i16 signext %b, <2 x i16> %c, <2 x i16> %d) {
 ; CHECK-LABEL: selectcc_v2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB19_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB19_2:
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <2 x i16> %c, <2 x i16> %d
@@ -407,17 +299,11 @@ define <2 x i16> @selectcc_v2i16(i16 signext %a, i16 signext %b, <2 x i16> %c, <
 define <4 x i16> @select_v4i16(i1 zeroext %c, <4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: select_v4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB20_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB20_2:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <4 x i16> %a, <4 x i16> %b
   ret <4 x i16> %v
@@ -426,17 +312,13 @@ define <4 x i16> @select_v4i16(i1 zeroext %c, <4 x i16> %a, <4 x i16> %b) {
 define <4 x i16> @selectcc_v4i16(i16 signext %a, i16 signext %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: selectcc_v4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB21_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB21_2:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <4 x i16> %c, <4 x i16> %d
@@ -446,17 +328,11 @@ define <4 x i16> @selectcc_v4i16(i16 signext %a, i16 signext %b, <4 x i16> %c, <
 define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: select_v8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB22_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB22_2:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %v
@@ -465,17 +341,13 @@ define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %a, <8 x i16> %b) {
 define <8 x i16> @selectcc_v8i16(i16 signext %a, i16 signext %b, <8 x i16> %c, <8 x i16> %d) {
 ; CHECK-LABEL: selectcc_v8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB23_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB23_2:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <8 x i16> %c, <8 x i16> %d
@@ -485,17 +357,11 @@ define <8 x i16> @selectcc_v8i16(i16 signext %a, i16 signext %b, <8 x i16> %c, <
 define <16 x i16> @select_v16i16(i1 zeroext %c, <16 x i16> %a, <16 x i16> %b) {
 ; CHECK-LABEL: select_v16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB24_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB24_2:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a1
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <16 x i16> %a, <16 x i16> %b
   ret <16 x i16> %v
@@ -504,17 +370,13 @@ define <16 x i16> @select_v16i16(i1 zeroext %c, <16 x i16> %a, <16 x i16> %b) {
 define <16 x i16> @selectcc_v16i16(i16 signext %a, i16 signext %b, <16 x i16> %c, <16 x i16> %d) {
 ; CHECK-LABEL: selectcc_v16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB25_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB25_2:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a2
-; CHECK-NEXT:    vmv.v.x v28, a2
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <16 x i16> %c, <16 x i16> %d
@@ -524,17 +386,11 @@ define <16 x i16> @selectcc_v16i16(i16 signext %a, i16 signext %b, <16 x i16> %c
 define <2 x i32> @select_v2i32(i1 zeroext %c, <2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: select_v2i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x i32> %a, <2 x i32> %b
   ret <2 x i32> %v
@@ -543,17 +399,13 @@ define <2 x i32> @select_v2i32(i1 zeroext %c, <2 x i32> %a, <2 x i32> %b) {
 define <2 x i32> @selectcc_v2i32(i32 signext %a, i32 signext %b, <2 x i32> %c, <2 x i32> %d) {
 ; CHECK-LABEL: selectcc_v2i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <2 x i32> %c, <2 x i32> %d
@@ -563,17 +415,11 @@ define <2 x i32> @selectcc_v2i32(i32 signext %a, i32 signext %b, <2 x i32> %c, <
 define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: select_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB28_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB28_2:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <4 x i32> %a, <4 x i32> %b
   ret <4 x i32> %v
@@ -582,17 +428,13 @@ define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %a, <4 x i32> %b) {
 define <4 x i32> @selectcc_v4i32(i32 signext %a, i32 signext %b, <4 x i32> %c, <4 x i32> %d) {
 ; CHECK-LABEL: selectcc_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB29_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB29_2:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
@@ -602,17 +444,11 @@ define <4 x i32> @selectcc_v4i32(i32 signext %a, i32 signext %b, <4 x i32> %c, <
 define <8 x i32> @select_v8i32(i1 zeroext %c, <8 x i32> %a, <8 x i32> %b) {
 ; CHECK-LABEL: select_v8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB30_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB30_2:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a1
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <8 x i32> %a, <8 x i32> %b
   ret <8 x i32> %v
@@ -621,17 +457,13 @@ define <8 x i32> @select_v8i32(i1 zeroext %c, <8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @selectcc_v8i32(i32 signext %a, i32 signext %b, <8 x i32> %c, <8 x i32> %d) {
 ; CHECK-LABEL: selectcc_v8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB31_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB31_2:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a2
-; CHECK-NEXT:    vmv.v.x v28, a2
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <8 x i32> %c, <8 x i32> %d
@@ -641,17 +473,11 @@ define <8 x i32> @selectcc_v8i32(i32 signext %a, i32 signext %b, <8 x i32> %c, <
 define <16 x i32> @select_v16i32(i1 zeroext %c, <16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: select_v16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a1
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <16 x i32> %a, <16 x i32> %b
   ret <16 x i32> %v
@@ -660,17 +486,13 @@ define <16 x i32> @select_v16i32(i1 zeroext %c, <16 x i32> %a, <16 x i32> %b) {
 define <16 x i32> @selectcc_v16i32(i32 signext %a, i32 signext %b, <16 x i32> %c, <16 x i32> %d) {
 ; CHECK-LABEL: selectcc_v16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a2
-; CHECK-NEXT:    vmv.v.x v8, a2
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <16 x i32> %c, <16 x i32> %d
@@ -678,39 +500,14 @@ define <16 x i32> @selectcc_v16i32(i32 signext %a, i32 signext %b, <16 x i32> %c
 }
 
 define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %a, <2 x i64> %b) {
-; RV32-LABEL: select_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB34_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB34_2:
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT:    vmv.v.x v25, a1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; RV32-NEXT:    vand.vv v26, v8, v25
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT:    vmv.v.i v27, -1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; RV32-NEXT:    vxor.vv v25, v25, v27
-; RV32-NEXT:    vand.vv v25, v9, v25
-; RV32-NEXT:    vor.vv v8, v26, v25
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB34_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB34_2:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; RV64-NEXT:    vand.vx v25, v8, a1
-; RV64-NEXT:    vmv.v.x v26, a1
-; RV64-NEXT:    vxor.vi v26, v26, -1
-; RV64-NEXT:    vand.vv v26, v9, v26
-; RV64-NEXT:    vor.vv v8, v25, v26
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x i64> %a, <2 x i64> %b
   ret <2 x i64> %v
 }
@@ -720,37 +517,24 @@ define <2 x i64> @selectcc_v2i64(i64 signext %a, i64 signext %b, <2 x i64> %c, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB35_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB35_2:
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
 ; RV32-NEXT:    vmv.v.x v25, a0
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; RV32-NEXT:    vand.vv v26, v8, v25
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT:    vmv.v.i v27, -1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; RV32-NEXT:    vxor.vv v25, v25, v27
-; RV32-NEXT:    vand.vv v25, v9, v25
-; RV32-NEXT:    vor.vv v8, v26, v25
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_v2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB35_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB35_2:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; RV64-NEXT:    vand.vx v25, v8, a2
-; RV64-NEXT:    vmv.v.x v26, a2
-; RV64-NEXT:    vxor.vi v26, v26, -1
-; RV64-NEXT:    vand.vv v26, v9, v26
-; RV64-NEXT:    vor.vv v8, v25, v26
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <2 x i64> %c, <2 x i64> %d
@@ -758,39 +542,14 @@ define <2 x i64> @selectcc_v2i64(i64 signext %a, i64 signext %b, <2 x i64> %c, <
 }
 
 define <4 x i64> @select_v4i64(i1 zeroext %c, <4 x i64> %a, <4 x i64> %b) {
-; RV32-LABEL: select_v4i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB36_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB36_2:
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT:    vmv.v.x v26, a1
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV32-NEXT:    vand.vv v28, v8, v26
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT:    vmv.v.i v30, -1
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV32-NEXT:    vxor.vv v26, v26, v30
-; RV32-NEXT:    vand.vv v26, v10, v26
-; RV32-NEXT:    vor.vv v8, v28, v26
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v4i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB36_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB36_2:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV64-NEXT:    vand.vx v26, v8, a1
-; RV64-NEXT:    vmv.v.x v28, a1
-; RV64-NEXT:    vxor.vi v28, v28, -1
-; RV64-NEXT:    vand.vv v28, v10, v28
-; RV64-NEXT:    vor.vv v8, v26, v28
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <4 x i64> %a, <4 x i64> %b
   ret <4 x i64> %v
 }
@@ -800,37 +559,24 @@ define <4 x i64> @selectcc_v4i64(i64 signext %a, i64 signext %b, <4 x i64> %c, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB37_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB37_2:
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT:    vmv.v.x v26, a0
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV32-NEXT:    vand.vv v28, v8, v26
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT:    vmv.v.i v30, -1
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV32-NEXT:    vxor.vv v26, v26, v30
-; RV32-NEXT:    vand.vv v26, v10, v26
-; RV32-NEXT:    vor.vv v8, v28, v26
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; RV32-NEXT:    vmv.v.x v25, a0
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_v4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB37_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB37_2:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; RV64-NEXT:    vand.vx v26, v8, a2
-; RV64-NEXT:    vmv.v.x v28, a2
-; RV64-NEXT:    vxor.vi v28, v28, -1
-; RV64-NEXT:    vand.vv v28, v10, v28
-; RV64-NEXT:    vor.vv v8, v26, v28
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <4 x i64> %c, <4 x i64> %d
@@ -838,39 +584,14 @@ define <4 x i64> @selectcc_v4i64(i64 signext %a, i64 signext %b, <4 x i64> %c, <
 }
 
 define <8 x i64> @select_v8i64(i1 zeroext %c, <8 x i64> %a, <8 x i64> %b) {
-; RV32-LABEL: select_v8i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB38_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB38_2:
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vmv.v.x v28, a1
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vmv.v.i v16, -1
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT:    vxor.vv v28, v28, v16
-; RV32-NEXT:    vand.vv v28, v12, v28
-; RV32-NEXT:    vor.vv v8, v8, v28
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v8i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB38_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB38_2:
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vand.vx v28, v8, a1
-; RV64-NEXT:    vmv.v.x v8, a1
-; RV64-NEXT:    vxor.vi v8, v8, -1
-; RV64-NEXT:    vand.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v28, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <8 x i64> %a, <8 x i64> %b
   ret <8 x i64> %v
 }
@@ -880,37 +601,24 @@ define <8 x i64> @selectcc_v8i64(i64 signext %a, i64 signext %b, <8 x i64> %c, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB39_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB39_2:
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vmv.v.x v28, a0
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vmv.v.i v16, -1
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT:    vxor.vv v28, v28, v16
-; RV32-NEXT:    vand.vv v28, v12, v28
-; RV32-NEXT:    vor.vv v8, v8, v28
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; RV32-NEXT:    vmv.v.x v25, a0
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_v8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB39_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB39_2:
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vand.vx v28, v8, a2
-; RV64-NEXT:    vmv.v.x v8, a2
-; RV64-NEXT:    vxor.vi v8, v8, -1
-; RV64-NEXT:    vand.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v28, v8
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <8 x i64> %c, <8 x i64> %d
@@ -918,40 +626,14 @@ define <8 x i64> @selectcc_v8i64(i64 signext %a, i64 signext %b, <8 x i64> %c, <
 }
 
 define <16 x i64> @select_v16i64(i1 zeroext %c, <16 x i64> %a, <16 x i64> %b) {
-; RV32-LABEL: select_v16i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB40_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB40_2:
-; RV32-NEXT:    addi a0, zero, 32
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; RV32-NEXT:    vmv.v.x v24, a1
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; RV32-NEXT:    vmv.v.i v0, -1
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV32-NEXT:    vxor.vv v24, v24, v0
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_v16i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB40_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB40_2:
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vmv.v.x v24, a1
-; RV64-NEXT:    vxor.vi v24, v24, -1
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <16 x i64> %a, <16 x i64> %b
   ret <16 x i64> %v
 }
@@ -961,38 +643,24 @@ define <16 x i64> @selectcc_v16i64(i64 signext %a, i64 signext %b, <16 x i64> %c
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB41_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB41_2:
-; RV32-NEXT:    addi a1, zero, 32
-; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV32-NEXT:    vmv.v.x v24, a0
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV32-NEXT:    vmv.v.i v0, -1
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV32-NEXT:    vxor.vv v24, v24, v0
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; RV32-NEXT:    vmv.v.x v25, a0
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_v16i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB41_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB41_2:
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vmv.v.x v24, a2
-; RV64-NEXT:    vxor.vi v24, v24, -1
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <16 x i64> %c, <16 x i64> %d

diff  --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
index 1888ba04f52d2..ed49db889f817 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
@@ -1,23 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @select_nxv1f16(i1 zeroext %c, <vscale x 1 x half> %a, <vscale x 1 x half> %b) {
 ; CHECK-LABEL: select_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB0_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 1 x half> %a, <vscale x 1 x half> %b
   ret <vscale x 1 x half> %v
@@ -26,18 +20,12 @@ define <vscale x 1 x half> @select_nxv1f16(i1 zeroext %c, <vscale x 1 x half> %a
 define <vscale x 1 x half> @selectcc_nxv1f16(half %a, half %b, <vscale x 1 x half> %c, <vscale x 1 x half> %d) {
 ; CHECK-LABEL: selectcc_nxv1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.h a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB1_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB1_2:
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a0
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <vscale x 1 x half> %c, <vscale x 1 x half> %d
@@ -47,17 +35,11 @@ define <vscale x 1 x half> @selectcc_nxv1f16(half %a, half %b, <vscale x 1 x hal
 define <vscale x 2 x half> @select_nxv2f16(i1 zeroext %c, <vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: select_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB2_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB2_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 2 x half> %a, <vscale x 2 x half> %b
   ret <vscale x 2 x half> %v
@@ -66,18 +48,12 @@ define <vscale x 2 x half> @select_nxv2f16(i1 zeroext %c, <vscale x 2 x half> %a
 define <vscale x 2 x half> @selectcc_nxv2f16(half %a, half %b, <vscale x 2 x half> %c, <vscale x 2 x half> %d) {
 ; CHECK-LABEL: selectcc_nxv2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.h a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB3_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB3_2:
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a0
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <vscale x 2 x half> %c, <vscale x 2 x half> %d
@@ -87,17 +63,11 @@ define <vscale x 2 x half> @selectcc_nxv2f16(half %a, half %b, <vscale x 2 x hal
 define <vscale x 4 x half> @select_nxv4f16(i1 zeroext %c, <vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: select_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB4_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB4_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 4 x half> %a, <vscale x 4 x half> %b
   ret <vscale x 4 x half> %v
@@ -106,18 +76,12 @@ define <vscale x 4 x half> @select_nxv4f16(i1 zeroext %c, <vscale x 4 x half> %a
 define <vscale x 4 x half> @selectcc_nxv4f16(half %a, half %b, <vscale x 4 x half> %c, <vscale x 4 x half> %d) {
 ; CHECK-LABEL: selectcc_nxv4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.h a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB5_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB5_2:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a0
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <vscale x 4 x half> %c, <vscale x 4 x half> %d
@@ -127,17 +91,11 @@ define <vscale x 4 x half> @selectcc_nxv4f16(half %a, half %b, <vscale x 4 x hal
 define <vscale x 8 x half> @select_nxv8f16(i1 zeroext %c, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: select_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB6_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB6_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a1
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 8 x half> %a, <vscale x 8 x half> %b
   ret <vscale x 8 x half> %v
@@ -146,18 +104,12 @@ define <vscale x 8 x half> @select_nxv8f16(i1 zeroext %c, <vscale x 8 x half> %a
 define <vscale x 8 x half> @selectcc_nxv8f16(half %a, half %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) {
 ; CHECK-LABEL: selectcc_nxv8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.h a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB7_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB7_2:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a0
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <vscale x 8 x half> %c, <vscale x 8 x half> %d
@@ -167,17 +119,11 @@ define <vscale x 8 x half> @selectcc_nxv8f16(half %a, half %b, <vscale x 8 x hal
 define <vscale x 16 x half> @select_nxv16f16(i1 zeroext %c, <vscale x 16 x half> %a, <vscale x 16 x half> %b) {
 ; CHECK-LABEL: select_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB8_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB8_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a1
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 16 x half> %a, <vscale x 16 x half> %b
   ret <vscale x 16 x half> %v
@@ -186,18 +132,12 @@ define <vscale x 16 x half> @select_nxv16f16(i1 zeroext %c, <vscale x 16 x half>
 define <vscale x 16 x half> @selectcc_nxv16f16(half %a, half %b, <vscale x 16 x half> %c, <vscale x 16 x half> %d) {
 ; CHECK-LABEL: selectcc_nxv16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.h a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB9_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB9_2:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a0
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <vscale x 16 x half> %c, <vscale x 16 x half> %d
@@ -207,17 +147,11 @@ define <vscale x 16 x half> @selectcc_nxv16f16(half %a, half %b, <vscale x 16 x
 define <vscale x 32 x half> @select_nxv32f16(i1 zeroext %c, <vscale x 32 x half> %a, <vscale x 32 x half> %b) {
 ; CHECK-LABEL: select_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB10_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB10_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.v.x v28, a0
+; CHECK-NEXT:    vmsne.vi v0, v28, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 32 x half> %a, <vscale x 32 x half> %b
   ret <vscale x 32 x half> %v
@@ -226,18 +160,12 @@ define <vscale x 32 x half> @select_nxv32f16(i1 zeroext %c, <vscale x 32 x half>
 define <vscale x 32 x half> @selectcc_nxv32f16(half %a, half %b, <vscale x 32 x half> %c, <vscale x 32 x half> %d) {
 ; CHECK-LABEL: selectcc_nxv32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.h a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB11_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB11_2:
-; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.v.x v28, a0
+; CHECK-NEXT:    vmsne.vi v0, v28, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq half %a, %b
   %v = select i1 %cmp, <vscale x 32 x half> %c, <vscale x 32 x half> %d
@@ -247,17 +175,11 @@ define <vscale x 32 x half> @selectcc_nxv32f16(half %a, half %b, <vscale x 32 x
 define <vscale x 1 x float> @select_nxv1f32(i1 zeroext %c, <vscale x 1 x float> %a, <vscale x 1 x float> %b) {
 ; CHECK-LABEL: select_nxv1f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB12_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB12_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 1 x float> %a, <vscale x 1 x float> %b
   ret <vscale x 1 x float> %v
@@ -266,18 +188,12 @@ define <vscale x 1 x float> @select_nxv1f32(i1 zeroext %c, <vscale x 1 x float>
 define <vscale x 1 x float> @selectcc_nxv1f32(float %a, float %b, <vscale x 1 x float> %c, <vscale x 1 x float> %d) {
 ; CHECK-LABEL: selectcc_nxv1f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.s a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB13_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB13_2:
-; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a0
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <vscale x 1 x float> %c, <vscale x 1 x float> %d
@@ -287,17 +203,11 @@ define <vscale x 1 x float> @selectcc_nxv1f32(float %a, float %b, <vscale x 1 x
 define <vscale x 2 x float> @select_nxv2f32(i1 zeroext %c, <vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: select_nxv2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB14_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB14_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 2 x float> %a, <vscale x 2 x float> %b
   ret <vscale x 2 x float> %v
@@ -306,18 +216,12 @@ define <vscale x 2 x float> @select_nxv2f32(i1 zeroext %c, <vscale x 2 x float>
 define <vscale x 2 x float> @selectcc_nxv2f32(float %a, float %b, <vscale x 2 x float> %c, <vscale x 2 x float> %d) {
 ; CHECK-LABEL: selectcc_nxv2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.s a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB15_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB15_2:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a0
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <vscale x 2 x float> %c, <vscale x 2 x float> %d
@@ -327,17 +231,11 @@ define <vscale x 2 x float> @selectcc_nxv2f32(float %a, float %b, <vscale x 2 x
 define <vscale x 4 x float> @select_nxv4f32(i1 zeroext %c, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: select_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB16_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB16_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a1
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 4 x float> %a, <vscale x 4 x float> %b
   ret <vscale x 4 x float> %v
@@ -346,18 +244,12 @@ define <vscale x 4 x float> @select_nxv4f32(i1 zeroext %c, <vscale x 4 x float>
 define <vscale x 4 x float> @selectcc_nxv4f32(float %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) {
 ; CHECK-LABEL: selectcc_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.s a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB17_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB17_2:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a0
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <vscale x 4 x float> %c, <vscale x 4 x float> %d
@@ -367,17 +259,11 @@ define <vscale x 4 x float> @selectcc_nxv4f32(float %a, float %b, <vscale x 4 x
 define <vscale x 8 x float> @select_nxv8f32(i1 zeroext %c, <vscale x 8 x float> %a, <vscale x 8 x float> %b) {
 ; CHECK-LABEL: select_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB18_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB18_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a1
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 8 x float> %a, <vscale x 8 x float> %b
   ret <vscale x 8 x float> %v
@@ -386,18 +272,12 @@ define <vscale x 8 x float> @select_nxv8f32(i1 zeroext %c, <vscale x 8 x float>
 define <vscale x 8 x float> @selectcc_nxv8f32(float %a, float %b, <vscale x 8 x float> %c, <vscale x 8 x float> %d) {
 ; CHECK-LABEL: selectcc_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.s a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB19_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB19_2:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a0
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <vscale x 8 x float> %c, <vscale x 8 x float> %d
@@ -407,17 +287,11 @@ define <vscale x 8 x float> @selectcc_nxv8f32(float %a, float %b, <vscale x 8 x
 define <vscale x 16 x float> @select_nxv16f32(i1 zeroext %c, <vscale x 16 x float> %a, <vscale x 16 x float> %b) {
 ; CHECK-LABEL: select_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB20_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB20_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 16 x float> %a, <vscale x 16 x float> %b
   ret <vscale x 16 x float> %v
@@ -426,18 +300,12 @@ define <vscale x 16 x float> @select_nxv16f32(i1 zeroext %c, <vscale x 16 x floa
 define <vscale x 16 x float> @selectcc_nxv16f32(float %a, float %b, <vscale x 16 x float> %c, <vscale x 16 x float> %d) {
 ; CHECK-LABEL: selectcc_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    feq.s a1, fa0, fa1
-; CHECK-NEXT:    addi a0, zero, -1
-; CHECK-NEXT:    bnez a1, .LBB21_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB21_2:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a0
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = fcmp oeq float %a, %b
   %v = select i1 %cmp, <vscale x 16 x float> %c, <vscale x 16 x float> %d
@@ -445,332 +313,112 @@ define <vscale x 16 x float> @selectcc_nxv16f32(float %a, float %b, <vscale x 16
 }
 
 define <vscale x 1 x double> @select_nxv1f64(i1 zeroext %c, <vscale x 1 x double> %a, <vscale x 1 x double> %b) {
-; RV32-LABEL: select_nxv1f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB22_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB22_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v25, (a0), zero
-; RV32-NEXT:    vand.vv v26, v8, v25
-; RV32-NEXT:    vxor.vi v25, v25, -1
-; RV32-NEXT:    vand.vv v25, v9, v25
-; RV32-NEXT:    vor.vv v8, v26, v25
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv1f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB22_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB22_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV64-NEXT:    vand.vx v25, v8, a1
-; RV64-NEXT:    vmv.v.x v26, a1
-; RV64-NEXT:    vxor.vi v26, v26, -1
-; RV64-NEXT:    vand.vv v26, v9, v26
-; RV64-NEXT:    vor.vv v8, v25, v26
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 1 x double> %a, <vscale x 1 x double> %b
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 1 x double> @selectcc_nxv1f64(double %a, double %b, <vscale x 1 x double> %c, <vscale x 1 x double> %d) {
-; RV32-LABEL: selectcc_nxv1f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    feq.d a1, fa0, fa1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB23_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB23_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v25, (a0), zero
-; RV32-NEXT:    vand.vv v26, v8, v25
-; RV32-NEXT:    vxor.vi v25, v25, -1
-; RV32-NEXT:    vand.vv v25, v9, v25
-; RV32-NEXT:    vor.vv v8, v26, v25
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_nxv1f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    feq.d a1, fa0, fa1
-; RV64-NEXT:    addi a0, zero, -1
-; RV64-NEXT:    bnez a1, .LBB23_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, zero
-; RV64-NEXT:  .LBB23_2:
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vand.vx v25, v8, a0
-; RV64-NEXT:    vmv.v.x v26, a0
-; RV64-NEXT:    vxor.vi v26, v26, -1
-; RV64-NEXT:    vand.vv v26, v9, v26
-; RV64-NEXT:    vor.vv v8, v25, v26
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_nxv1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <vscale x 1 x double> %c, <vscale x 1 x double> %d
   ret <vscale x 1 x double> %v
 }
 
 define <vscale x 2 x double> @select_nxv2f64(i1 zeroext %c, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
-; RV32-LABEL: select_nxv2f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB24_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB24_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v26, (a0), zero
-; RV32-NEXT:    vand.vv v28, v8, v26
-; RV32-NEXT:    vxor.vi v26, v26, -1
-; RV32-NEXT:    vand.vv v26, v10, v26
-; RV32-NEXT:    vor.vv v8, v28, v26
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv2f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB24_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB24_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV64-NEXT:    vand.vx v26, v8, a1
-; RV64-NEXT:    vmv.v.x v28, a1
-; RV64-NEXT:    vxor.vi v28, v28, -1
-; RV64-NEXT:    vand.vv v28, v10, v28
-; RV64-NEXT:    vor.vv v8, v26, v28
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 2 x double> %a, <vscale x 2 x double> %b
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 2 x double> @selectcc_nxv2f64(double %a, double %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) {
-; RV32-LABEL: selectcc_nxv2f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    feq.d a1, fa0, fa1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB25_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB25_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v26, (a0), zero
-; RV32-NEXT:    vand.vv v28, v8, v26
-; RV32-NEXT:    vxor.vi v26, v26, -1
-; RV32-NEXT:    vand.vv v26, v10, v26
-; RV32-NEXT:    vor.vv v8, v28, v26
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_nxv2f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    feq.d a1, fa0, fa1
-; RV64-NEXT:    addi a0, zero, -1
-; RV64-NEXT:    bnez a1, .LBB25_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, zero
-; RV64-NEXT:  .LBB25_2:
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vand.vx v26, v8, a0
-; RV64-NEXT:    vmv.v.x v28, a0
-; RV64-NEXT:    vxor.vi v28, v28, -1
-; RV64-NEXT:    vand.vv v28, v10, v28
-; RV64-NEXT:    vor.vv v8, v26, v28
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_nxv2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <vscale x 2 x double> %c, <vscale x 2 x double> %d
   ret <vscale x 2 x double> %v
 }
 
 define <vscale x 4 x double> @select_nxv4f64(i1 zeroext %c, <vscale x 4 x double> %a, <vscale x 4 x double> %b) {
-; RV32-LABEL: select_nxv4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB26_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB26_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v28, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vxor.vi v28, v28, -1
-; RV32-NEXT:    vand.vv v28, v12, v28
-; RV32-NEXT:    vor.vv v8, v8, v28
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB26_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB26_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV64-NEXT:    vand.vx v28, v8, a1
-; RV64-NEXT:    vmv.v.x v8, a1
-; RV64-NEXT:    vxor.vi v8, v8, -1
-; RV64-NEXT:    vand.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v28, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 4 x double> %a, <vscale x 4 x double> %b
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 4 x double> @selectcc_nxv4f64(double %a, double %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
-; RV32-LABEL: selectcc_nxv4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    feq.d a1, fa0, fa1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB27_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB27_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v28, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vxor.vi v28, v28, -1
-; RV32-NEXT:    vand.vv v28, v12, v28
-; RV32-NEXT:    vor.vv v8, v8, v28
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_nxv4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    feq.d a1, fa0, fa1
-; RV64-NEXT:    addi a0, zero, -1
-; RV64-NEXT:    bnez a1, .LBB27_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, zero
-; RV64-NEXT:  .LBB27_2:
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vand.vx v28, v8, a0
-; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vxor.vi v8, v8, -1
-; RV64-NEXT:    vand.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v28, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_nxv4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <vscale x 4 x double> %c, <vscale x 4 x double> %d
   ret <vscale x 4 x double> %v
 }
 
 define <vscale x 8 x double> @select_nxv8f64(i1 zeroext %c, <vscale x 8 x double> %a, <vscale x 8 x double> %b) {
-; RV32-LABEL: select_nxv8f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB28_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB28_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vxor.vi v24, v24, -1
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv8f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB28_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB28_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vmv.v.x v24, a1
-; RV64-NEXT:    vxor.vi v24, v24, -1
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 8 x double> %a, <vscale x 8 x double> %b
   ret <vscale x 8 x double> %v
 }
 
 define <vscale x 8 x double> @selectcc_nxv8f64(double %a, double %b, <vscale x 8 x double> %c, <vscale x 8 x double> %d) {
-; RV32-LABEL: selectcc_nxv8f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    feq.d a1, fa0, fa1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB29_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB29_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vxor.vi v24, v24, -1
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: selectcc_nxv8f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    feq.d a1, fa0, fa1
-; RV64-NEXT:    addi a0, zero, -1
-; RV64-NEXT:    bnez a1, .LBB29_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, zero
-; RV64-NEXT:  .LBB29_2:
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vmv.v.x v24, a0
-; RV64-NEXT:    vxor.vi v24, v24, -1
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: selectcc_nxv8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    ret
   %cmp = fcmp oeq double %a, %b
   %v = select i1 %cmp, <vscale x 8 x double> %c, <vscale x 8 x double> %d
   ret <vscale x 8 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
index 70f782bbcec39..52fd26c19be55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
@@ -7,13 +7,8 @@
 define <vscale x 1 x i1> @select_nxv1i1(i1 zeroext %c, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b) {
 ; CHECK-LABEL: select_nxv1i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB0_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -27,12 +22,7 @@ define <vscale x 1 x i1> @selectcc_nxv1i1(i1 signext %a, i1 signext %b, <vscale
 ; CHECK-LABEL: selectcc_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB1_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -48,13 +38,8 @@ define <vscale x 1 x i1> @selectcc_nxv1i1(i1 signext %a, i1 signext %b, <vscale
 define <vscale x 2 x i1> @select_nxv2i1(i1 zeroext %c, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: select_nxv2i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB2_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB2_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -68,12 +53,7 @@ define <vscale x 2 x i1> @selectcc_nxv2i1(i1 signext %a, i1 signext %b, <vscale
 ; CHECK-LABEL: selectcc_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB3_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB3_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -89,13 +69,8 @@ define <vscale x 2 x i1> @selectcc_nxv2i1(i1 signext %a, i1 signext %b, <vscale
 define <vscale x 4 x i1> @select_nxv4i1(i1 zeroext %c, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: select_nxv4i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB4_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB4_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -109,12 +84,7 @@ define <vscale x 4 x i1> @selectcc_nxv4i1(i1 signext %a, i1 signext %b, <vscale
 ; CHECK-LABEL: selectcc_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB5_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -130,13 +100,8 @@ define <vscale x 4 x i1> @selectcc_nxv4i1(i1 signext %a, i1 signext %b, <vscale
 define <vscale x 8 x i1> @select_nxv8i1(i1 zeroext %c, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: select_nxv8i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB6_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB6_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -150,12 +115,7 @@ define <vscale x 8 x i1> @selectcc_nxv8i1(i1 signext %a, i1 signext %b, <vscale
 ; CHECK-LABEL: selectcc_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB7_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vmsne.vi v25, v25, 0
@@ -171,13 +131,8 @@ define <vscale x 8 x i1> @selectcc_nxv8i1(i1 signext %a, i1 signext %b, <vscale
 define <vscale x 16 x i1> @select_nxv16i1(i1 zeroext %c, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: select_nxv16i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB8_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB8_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vmv.v.x v26, a1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vmsne.vi v25, v26, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -191,12 +146,7 @@ define <vscale x 16 x i1> @selectcc_nxv16i1(i1 signext %a, i1 signext %b, <vscal
 ; CHECK-LABEL: selectcc_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB9_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB9_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vmsne.vi v25, v26, 0
@@ -212,13 +162,8 @@ define <vscale x 16 x i1> @selectcc_nxv16i1(i1 signext %a, i1 signext %b, <vscal
 define <vscale x 32 x i1> @select_nxv32i1(i1 zeroext %c, <vscale x 32 x i1> %a, <vscale x 32 x i1> %b) {
 ; CHECK-LABEL: select_nxv32i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB10_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB10_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, mu
-; CHECK-NEXT:    vmv.v.x v28, a1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.v.x v28, a0
 ; CHECK-NEXT:    vmsne.vi v25, v28, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -232,12 +177,7 @@ define <vscale x 32 x i1> @selectcc_nxv32i1(i1 signext %a, i1 signext %b, <vscal
 ; CHECK-LABEL: selectcc_nxv32i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB11_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv.v.x v28, a0
 ; CHECK-NEXT:    vmsne.vi v25, v28, 0
@@ -253,13 +193,8 @@ define <vscale x 32 x i1> @selectcc_nxv32i1(i1 signext %a, i1 signext %b, <vscal
 define <vscale x 64 x i1> @select_nxv64i1(i1 zeroext %c, <vscale x 64 x i1> %a, <vscale x 64 x i1> %b) {
 ; CHECK-LABEL: select_nxv64i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, 1
-; CHECK-NEXT:    bnez a0, .LBB12_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB12_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, mu
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, mu
+; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vmsne.vi v25, v16, 0
 ; CHECK-NEXT:    vmandnot.mm v26, v8, v25
 ; CHECK-NEXT:    vmand.mm v25, v0, v25
@@ -273,12 +208,7 @@ define <vscale x 64 x i1> @selectcc_nxv64i1(i1 signext %a, i1 signext %b, <vscal
 ; CHECK-LABEL: selectcc_nxv64i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor a0, a0, a1
-; CHECK-NEXT:    andi a1, a0, 1
-; CHECK-NEXT:    addi a0, zero, 1
-; CHECK-NEXT:    bnez a1, .LBB13_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:  .LBB13_2:
+; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vmv.v.x v16, a0
 ; CHECK-NEXT:    vmsne.vi v25, v16, 0
@@ -294,17 +224,10 @@ define <vscale x 64 x i1> @selectcc_nxv64i1(i1 signext %a, i1 signext %b, <vscal
 define <vscale x 1 x i8> @select_nxv1i8(i1 zeroext %c, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b) {
 ; CHECK-LABEL: select_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB14_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB14_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b
   ret <vscale x 1 x i8> %v
@@ -313,17 +236,12 @@ define <vscale x 1 x i8> @select_nxv1i8(i1 zeroext %c, <vscale x 1 x i8> %a, <vs
 define <vscale x 1 x i8> @selectcc_nxv1i8(i8 signext %a, i8 signext %b, <vscale x 1 x i8> %c, <vscale x 1 x i8> %d) {
 ; CHECK-LABEL: selectcc_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB15_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB15_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <vscale x 1 x i8> %c, <vscale x 1 x i8> %d
@@ -333,17 +251,10 @@ define <vscale x 1 x i8> @selectcc_nxv1i8(i8 signext %a, i8 signext %b, <vscale
 define <vscale x 2 x i8> @select_nxv2i8(i1 zeroext %c, <vscale x 2 x i8> %a, <vscale x 2 x i8> %b) {
 ; CHECK-LABEL: select_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB16_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB16_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 2 x i8> %a, <vscale x 2 x i8> %b
   ret <vscale x 2 x i8> %v
@@ -352,17 +263,12 @@ define <vscale x 2 x i8> @select_nxv2i8(i1 zeroext %c, <vscale x 2 x i8> %a, <vs
 define <vscale x 2 x i8> @selectcc_nxv2i8(i8 signext %a, i8 signext %b, <vscale x 2 x i8> %c, <vscale x 2 x i8> %d) {
 ; CHECK-LABEL: selectcc_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB17_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB17_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <vscale x 2 x i8> %c, <vscale x 2 x i8> %d
@@ -372,17 +278,10 @@ define <vscale x 2 x i8> @selectcc_nxv2i8(i8 signext %a, i8 signext %b, <vscale
 define <vscale x 4 x i8> @select_nxv4i8(i1 zeroext %c, <vscale x 4 x i8> %a, <vscale x 4 x i8> %b) {
 ; CHECK-LABEL: select_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB18_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB18_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 4 x i8> %a, <vscale x 4 x i8> %b
   ret <vscale x 4 x i8> %v
@@ -391,17 +290,12 @@ define <vscale x 4 x i8> @select_nxv4i8(i1 zeroext %c, <vscale x 4 x i8> %a, <vs
 define <vscale x 4 x i8> @selectcc_nxv4i8(i8 signext %a, i8 signext %b, <vscale x 4 x i8> %c, <vscale x 4 x i8> %d) {
 ; CHECK-LABEL: selectcc_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB19_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB19_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <vscale x 4 x i8> %c, <vscale x 4 x i8> %d
@@ -411,17 +305,10 @@ define <vscale x 4 x i8> @selectcc_nxv4i8(i8 signext %a, i8 signext %b, <vscale
 define <vscale x 8 x i8> @select_nxv8i8(i1 zeroext %c, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-LABEL: select_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB20_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB20_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
   ret <vscale x 8 x i8> %v
@@ -430,17 +317,12 @@ define <vscale x 8 x i8> @select_nxv8i8(i1 zeroext %c, <vscale x 8 x i8> %a, <vs
 define <vscale x 8 x i8> @selectcc_nxv8i8(i8 signext %a, i8 signext %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d) {
 ; CHECK-LABEL: selectcc_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB21_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB21_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d
@@ -450,17 +332,10 @@ define <vscale x 8 x i8> @selectcc_nxv8i8(i8 signext %a, i8 signext %b, <vscale
 define <vscale x 16 x i8> @select_nxv16i8(i1 zeroext %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: select_nxv16i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB22_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB22_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a1
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %v
@@ -469,17 +344,12 @@ define <vscale x 16 x i8> @select_nxv16i8(i1 zeroext %c, <vscale x 16 x i8> %a,
 define <vscale x 16 x i8> @selectcc_nxv16i8(i8 signext %a, i8 signext %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
 ; CHECK-LABEL: selectcc_nxv16i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB23_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB23_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a2
-; CHECK-NEXT:    vmv.v.x v28, a2
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d
@@ -489,17 +359,10 @@ define <vscale x 16 x i8> @selectcc_nxv16i8(i8 signext %a, i8 signext %b, <vscal
 define <vscale x 32 x i8> @select_nxv32i8(i1 zeroext %c, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
 ; CHECK-LABEL: select_nxv32i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB24_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB24_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a1
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.v.x v28, a0
+; CHECK-NEXT:    vmsne.vi v0, v28, 0
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b
   ret <vscale x 32 x i8> %v
@@ -508,17 +371,12 @@ define <vscale x 32 x i8> @select_nxv32i8(i1 zeroext %c, <vscale x 32 x i8> %a,
 define <vscale x 32 x i8> @selectcc_nxv32i8(i8 signext %a, i8 signext %b, <vscale x 32 x i8> %c, <vscale x 32 x i8> %d) {
 ; CHECK-LABEL: selectcc_nxv32i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB25_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB25_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a2
-; CHECK-NEXT:    vmv.v.x v8, a2
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.v.x v28, a0
+; CHECK-NEXT:    vmsne.vi v0, v28, 0
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <vscale x 32 x i8> %c, <vscale x 32 x i8> %d
@@ -528,17 +386,10 @@ define <vscale x 32 x i8> @selectcc_nxv32i8(i8 signext %a, i8 signext %b, <vscal
 define <vscale x 64 x i8> @select_nxv64i8(i1 zeroext %c, <vscale x 64 x i8> %a, <vscale x 64 x i8> %b) {
 ; CHECK-LABEL: select_nxv64i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB26_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, mu
+; CHECK-NEXT:    vmv.v.x v24, a0
+; CHECK-NEXT:    vmsne.vi v0, v24, 0
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 64 x i8> %a, <vscale x 64 x i8> %b
   ret <vscale x 64 x i8> %v
@@ -547,17 +398,12 @@ define <vscale x 64 x i8> @select_nxv64i8(i1 zeroext %c, <vscale x 64 x i8> %a,
 define <vscale x 64 x i8> @selectcc_nxv64i8(i8 signext %a, i8 signext %b, <vscale x 64 x i8> %c, <vscale x 64 x i8> %d) {
 ; CHECK-LABEL: selectcc_nxv64i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB27_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB27_2:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vmv.v.x v24, a2
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, mu
+; CHECK-NEXT:    vmv.v.x v24, a0
+; CHECK-NEXT:    vmsne.vi v0, v24, 0
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i8 %a, %b
   %v = select i1 %cmp, <vscale x 64 x i8> %c, <vscale x 64 x i8> %d
@@ -567,17 +413,11 @@ define <vscale x 64 x i8> @selectcc_nxv64i8(i8 signext %a, i8 signext %b, <vscal
 define <vscale x 1 x i16> @select_nxv1i16(i1 zeroext %c, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b) {
 ; CHECK-LABEL: select_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB28_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB28_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b
   ret <vscale x 1 x i16> %v
@@ -586,17 +426,13 @@ define <vscale x 1 x i16> @select_nxv1i16(i1 zeroext %c, <vscale x 1 x i16> %a,
 define <vscale x 1 x i16> @selectcc_nxv1i16(i16 signext %a, i16 signext %b, <vscale x 1 x i16> %c, <vscale x 1 x i16> %d) {
 ; CHECK-LABEL: selectcc_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB29_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB29_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <vscale x 1 x i16> %c, <vscale x 1 x i16> %d
@@ -606,17 +442,11 @@ define <vscale x 1 x i16> @selectcc_nxv1i16(i16 signext %a, i16 signext %b, <vsc
 define <vscale x 2 x i16> @select_nxv2i16(i1 zeroext %c, <vscale x 2 x i16> %a, <vscale x 2 x i16> %b) {
 ; CHECK-LABEL: select_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB30_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB30_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 2 x i16> %a, <vscale x 2 x i16> %b
   ret <vscale x 2 x i16> %v
@@ -625,17 +455,13 @@ define <vscale x 2 x i16> @select_nxv2i16(i1 zeroext %c, <vscale x 2 x i16> %a,
 define <vscale x 2 x i16> @selectcc_nxv2i16(i16 signext %a, i16 signext %b, <vscale x 2 x i16> %c, <vscale x 2 x i16> %d) {
 ; CHECK-LABEL: selectcc_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB31_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB31_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <vscale x 2 x i16> %c, <vscale x 2 x i16> %d
@@ -645,17 +471,11 @@ define <vscale x 2 x i16> @selectcc_nxv2i16(i16 signext %a, i16 signext %b, <vsc
 define <vscale x 4 x i16> @select_nxv4i16(i1 zeroext %c, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
 ; CHECK-LABEL: select_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB32_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
   ret <vscale x 4 x i16> %v
@@ -664,17 +484,13 @@ define <vscale x 4 x i16> @select_nxv4i16(i1 zeroext %c, <vscale x 4 x i16> %a,
 define <vscale x 4 x i16> @selectcc_nxv4i16(i16 signext %a, i16 signext %b, <vscale x 4 x i16> %c, <vscale x 4 x i16> %d) {
 ; CHECK-LABEL: selectcc_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB33_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB33_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <vscale x 4 x i16> %c, <vscale x 4 x i16> %d
@@ -684,17 +500,11 @@ define <vscale x 4 x i16> @selectcc_nxv4i16(i16 signext %a, i16 signext %b, <vsc
 define <vscale x 8 x i16> @select_nxv8i16(i1 zeroext %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: select_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB34_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB34_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a1
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
   ret <vscale x 8 x i16> %v
@@ -703,17 +513,13 @@ define <vscale x 8 x i16> @select_nxv8i16(i1 zeroext %c, <vscale x 8 x i16> %a,
 define <vscale x 8 x i16> @selectcc_nxv8i16(i16 signext %a, i16 signext %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) {
 ; CHECK-LABEL: selectcc_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB35_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB35_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a2
-; CHECK-NEXT:    vmv.v.x v28, a2
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d
@@ -723,17 +529,11 @@ define <vscale x 8 x i16> @selectcc_nxv8i16(i16 signext %a, i16 signext %b, <vsc
 define <vscale x 16 x i16> @select_nxv16i16(i1 zeroext %c, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b) {
 ; CHECK-LABEL: select_nxv16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB36_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB36_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a1
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b
   ret <vscale x 16 x i16> %v
@@ -742,17 +542,13 @@ define <vscale x 16 x i16> @select_nxv16i16(i1 zeroext %c, <vscale x 16 x i16> %
 define <vscale x 16 x i16> @selectcc_nxv16i16(i16 signext %a, i16 signext %b, <vscale x 16 x i16> %c, <vscale x 16 x i16> %d) {
 ; CHECK-LABEL: selectcc_nxv16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB37_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB37_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a2
-; CHECK-NEXT:    vmv.v.x v8, a2
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <vscale x 16 x i16> %c, <vscale x 16 x i16> %d
@@ -762,17 +558,11 @@ define <vscale x 16 x i16> @selectcc_nxv16i16(i16 signext %a, i16 signext %b, <v
 define <vscale x 32 x i16> @select_nxv32i16(i1 zeroext %c, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
 ; CHECK-LABEL: select_nxv32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB38_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB38_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.v.x v28, a0
+; CHECK-NEXT:    vmsne.vi v0, v28, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b
   ret <vscale x 32 x i16> %v
@@ -781,17 +571,13 @@ define <vscale x 32 x i16> @select_nxv32i16(i1 zeroext %c, <vscale x 32 x i16> %
 define <vscale x 32 x i16> @selectcc_nxv32i16(i16 signext %a, i16 signext %b, <vscale x 32 x i16> %c, <vscale x 32 x i16> %d) {
 ; CHECK-LABEL: selectcc_nxv32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB39_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB39_2:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vmv.v.x v24, a2
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.v.x v28, a0
+; CHECK-NEXT:    vmsne.vi v0, v28, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i16 %a, %b
   %v = select i1 %cmp, <vscale x 32 x i16> %c, <vscale x 32 x i16> %d
@@ -801,17 +587,11 @@ define <vscale x 32 x i16> @selectcc_nxv32i16(i16 signext %a, i16 signext %b, <v
 define <vscale x 1 x i32> @select_nxv1i32(i1 zeroext %c, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b) {
 ; CHECK-LABEL: select_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB40_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB40_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b
   ret <vscale x 1 x i32> %v
@@ -820,17 +600,13 @@ define <vscale x 1 x i32> @select_nxv1i32(i1 zeroext %c, <vscale x 1 x i32> %a,
 define <vscale x 1 x i32> @selectcc_nxv1i32(i32 signext %a, i32 signext %b, <vscale x 1 x i32> %c, <vscale x 1 x i32> %d) {
 ; CHECK-LABEL: selectcc_nxv1i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB41_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB41_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <vscale x 1 x i32> %c, <vscale x 1 x i32> %d
@@ -840,17 +616,11 @@ define <vscale x 1 x i32> @selectcc_nxv1i32(i32 signext %a, i32 signext %b, <vsc
 define <vscale x 2 x i32> @select_nxv2i32(i1 zeroext %c, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: select_nxv2i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB42_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB42_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a1
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
   ret <vscale x 2 x i32> %v
@@ -859,17 +629,13 @@ define <vscale x 2 x i32> @select_nxv2i32(i1 zeroext %c, <vscale x 2 x i32> %a,
 define <vscale x 2 x i32> @selectcc_nxv2i32(i32 signext %a, i32 signext %b, <vscale x 2 x i32> %c, <vscale x 2 x i32> %d) {
 ; CHECK-LABEL: selectcc_nxv2i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB43_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB43_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vand.vx v25, v8, a2
-; CHECK-NEXT:    vmv.v.x v26, a2
-; CHECK-NEXT:    vxor.vi v26, v26, -1
-; CHECK-NEXT:    vand.vv v26, v9, v26
-; CHECK-NEXT:    vor.vv v8, v25, v26
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <vscale x 2 x i32> %c, <vscale x 2 x i32> %d
@@ -879,17 +645,11 @@ define <vscale x 2 x i32> @selectcc_nxv2i32(i32 signext %a, i32 signext %b, <vsc
 define <vscale x 4 x i32> @select_nxv4i32(i1 zeroext %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: select_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB44_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB44_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a1
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
   ret <vscale x 4 x i32> %v
@@ -898,17 +658,13 @@ define <vscale x 4 x i32> @select_nxv4i32(i1 zeroext %c, <vscale x 4 x i32> %a,
 define <vscale x 4 x i32> @selectcc_nxv4i32(i32 signext %a, i32 signext %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) {
 ; CHECK-LABEL: selectcc_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB45_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB45_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vand.vx v26, v8, a2
-; CHECK-NEXT:    vmv.v.x v28, a2
-; CHECK-NEXT:    vxor.vi v28, v28, -1
-; CHECK-NEXT:    vand.vv v28, v10, v28
-; CHECK-NEXT:    vor.vv v8, v26, v28
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d
@@ -918,17 +674,11 @@ define <vscale x 4 x i32> @selectcc_nxv4i32(i32 signext %a, i32 signext %b, <vsc
 define <vscale x 8 x i32> @select_nxv8i32(i1 zeroext %c, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
 ; CHECK-LABEL: select_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB46_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB46_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a1
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
   ret <vscale x 8 x i32> %v
@@ -937,17 +687,13 @@ define <vscale x 8 x i32> @select_nxv8i32(i1 zeroext %c, <vscale x 8 x i32> %a,
 define <vscale x 8 x i32> @selectcc_nxv8i32(i32 signext %a, i32 signext %b, <vscale x 8 x i32> %c, <vscale x 8 x i32> %d) {
 ; CHECK-LABEL: selectcc_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB47_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB47_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vand.vx v28, v8, a2
-; CHECK-NEXT:    vmv.v.x v8, a2
-; CHECK-NEXT:    vxor.vi v8, v8, -1
-; CHECK-NEXT:    vand.vv v8, v12, v8
-; CHECK-NEXT:    vor.vv v8, v28, v8
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <vscale x 8 x i32> %c, <vscale x 8 x i32> %d
@@ -957,17 +703,11 @@ define <vscale x 8 x i32> @selectcc_nxv8i32(i32 signext %a, i32 signext %b, <vsc
 define <vscale x 16 x i32> @select_nxv16i32(i1 zeroext %c, <vscale x 16 x i32> %a, <vscale x 16 x i32> %b) {
 ; CHECK-LABEL: select_nxv16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a1, zero, -1
-; CHECK-NEXT:    bnez a0, .LBB48_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a1, zero
-; CHECK-NEXT:  .LBB48_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 16 x i32> %a, <vscale x 16 x i32> %b
   ret <vscale x 16 x i32> %v
@@ -976,17 +716,13 @@ define <vscale x 16 x i32> @select_nxv16i32(i1 zeroext %c, <vscale x 16 x i32> %
 define <vscale x 16 x i32> @selectcc_nxv16i32(i32 signext %a, i32 signext %b, <vscale x 16 x i32> %c, <vscale x 16 x i32> %d) {
 ; CHECK-LABEL: selectcc_nxv16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a2, zero, -1
-; CHECK-NEXT:    bne a0, a1, .LBB49_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a2, zero
-; CHECK-NEXT:  .LBB49_2:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vmv.v.x v24, a2
-; CHECK-NEXT:    vxor.vi v24, v24, -1
-; CHECK-NEXT:    vand.vv v16, v16, v24
-; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vmsne.vi v0, v26, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %a, %b
   %v = select i1 %cmp, <vscale x 16 x i32> %c, <vscale x 16 x i32> %d
@@ -994,41 +730,14 @@ define <vscale x 16 x i32> @selectcc_nxv16i32(i32 signext %a, i32 signext %b, <v
 }
 
 define <vscale x 1 x i64> @select_nxv1i64(i1 zeroext %c, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b) {
-; RV32-LABEL: select_nxv1i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB50_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB50_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v25, (a0), zero
-; RV32-NEXT:    vand.vv v26, v8, v25
-; RV32-NEXT:    vxor.vi v25, v25, -1
-; RV32-NEXT:    vand.vv v25, v9, v25
-; RV32-NEXT:    vor.vv v8, v26, v25
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv1i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB50_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB50_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV64-NEXT:    vand.vx v25, v8, a1
-; RV64-NEXT:    vmv.v.x v26, a1
-; RV64-NEXT:    vxor.vi v26, v26, -1
-; RV64-NEXT:    vand.vv v26, v9, v26
-; RV64-NEXT:    vor.vv v8, v25, v26
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b
   ret <vscale x 1 x i64> %v
 }
@@ -1036,41 +745,26 @@ define <vscale x 1 x i64> @select_nxv1i64(i1 zeroext %c, <vscale x 1 x i64> %a,
 define <vscale x 1 x i64> @selectcc_nxv1i64(i64 signext %a, i64 signext %b, <vscale x 1 x i64> %c, <vscale x 1 x i64> %d) {
 ; RV32-LABEL: selectcc_nxv1i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB51_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB51_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v25, (a0), zero
-; RV32-NEXT:    vand.vv v26, v8, v25
-; RV32-NEXT:    vxor.vi v25, v25, -1
-; RV32-NEXT:    vand.vv v25, v9, v25
-; RV32-NEXT:    vor.vv v8, v26, v25
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; RV32-NEXT:    vmv.v.x v25, a0
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB51_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB51_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV64-NEXT:    vand.vx v25, v8, a2
-; RV64-NEXT:    vmv.v.x v26, a2
-; RV64-NEXT:    vxor.vi v26, v26, -1
-; RV64-NEXT:    vand.vv v26, v9, v26
-; RV64-NEXT:    vor.vv v8, v25, v26
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v9, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <vscale x 1 x i64> %c, <vscale x 1 x i64> %d
@@ -1078,41 +772,14 @@ define <vscale x 1 x i64> @selectcc_nxv1i64(i64 signext %a, i64 signext %b, <vsc
 }
 
 define <vscale x 2 x i64> @select_nxv2i64(i1 zeroext %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; RV32-LABEL: select_nxv2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB52_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB52_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v26, (a0), zero
-; RV32-NEXT:    vand.vv v28, v8, v26
-; RV32-NEXT:    vxor.vi v26, v26, -1
-; RV32-NEXT:    vand.vv v26, v10, v26
-; RV32-NEXT:    vor.vv v8, v28, v26
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB52_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB52_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV64-NEXT:    vand.vx v26, v8, a1
-; RV64-NEXT:    vmv.v.x v28, a1
-; RV64-NEXT:    vxor.vi v28, v28, -1
-; RV64-NEXT:    vand.vv v28, v10, v28
-; RV64-NEXT:    vor.vv v8, v26, v28
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
   ret <vscale x 2 x i64> %v
 }
@@ -1120,41 +787,26 @@ define <vscale x 2 x i64> @select_nxv2i64(i1 zeroext %c, <vscale x 2 x i64> %a,
 define <vscale x 2 x i64> @selectcc_nxv2i64(i64 signext %a, i64 signext %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
 ; RV32-LABEL: selectcc_nxv2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB53_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB53_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v26, (a0), zero
-; RV32-NEXT:    vand.vv v28, v8, v26
-; RV32-NEXT:    vxor.vi v26, v26, -1
-; RV32-NEXT:    vand.vv v26, v10, v26
-; RV32-NEXT:    vor.vv v8, v28, v26
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; RV32-NEXT:    vmv.v.x v25, a0
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_nxv2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB53_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB53_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV64-NEXT:    vand.vx v26, v8, a2
-; RV64-NEXT:    vmv.v.x v28, a2
-; RV64-NEXT:    vxor.vi v28, v28, -1
-; RV64-NEXT:    vand.vv v28, v10, v28
-; RV64-NEXT:    vor.vv v8, v26, v28
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d
@@ -1162,41 +814,14 @@ define <vscale x 2 x i64> @selectcc_nxv2i64(i64 signext %a, i64 signext %b, <vsc
 }
 
 define <vscale x 4 x i64> @select_nxv4i64(i1 zeroext %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
-; RV32-LABEL: select_nxv4i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB54_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB54_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v28, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vxor.vi v28, v28, -1
-; RV32-NEXT:    vand.vv v28, v12, v28
-; RV32-NEXT:    vor.vv v8, v8, v28
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv4i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB54_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB54_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV64-NEXT:    vand.vx v28, v8, a1
-; RV64-NEXT:    vmv.v.x v8, a1
-; RV64-NEXT:    vxor.vi v8, v8, -1
-; RV64-NEXT:    vand.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v28, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
   ret <vscale x 4 x i64> %v
 }
@@ -1204,41 +829,26 @@ define <vscale x 4 x i64> @select_nxv4i64(i1 zeroext %c, <vscale x 4 x i64> %a,
 define <vscale x 4 x i64> @selectcc_nxv4i64(i64 signext %a, i64 signext %b, <vscale x 4 x i64> %c, <vscale x 4 x i64> %d) {
 ; RV32-LABEL: selectcc_nxv4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB55_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB55_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v28, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v28
-; RV32-NEXT:    vxor.vi v28, v28, -1
-; RV32-NEXT:    vand.vv v28, v12, v28
-; RV32-NEXT:    vor.vv v8, v8, v28
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; RV32-NEXT:    vmv.v.x v25, a0
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_nxv4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB55_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB55_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV64-NEXT:    vand.vx v28, v8, a2
-; RV64-NEXT:    vmv.v.x v8, a2
-; RV64-NEXT:    vxor.vi v8, v8, -1
-; RV64-NEXT:    vand.vv v8, v12, v8
-; RV64-NEXT:    vor.vv v8, v28, v8
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <vscale x 4 x i64> %c, <vscale x 4 x i64> %d
@@ -1246,41 +856,14 @@ define <vscale x 4 x i64> @selectcc_nxv4i64(i64 signext %a, i64 signext %b, <vsc
 }
 
 define <vscale x 8 x i64> @select_nxv8i64(i1 zeroext %c, <vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
-; RV32-LABEL: select_nxv8i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    addi a1, zero, -1
-; RV32-NEXT:    bnez a0, .LBB56_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a1, zero
-; RV32-NEXT:  .LBB56_2:
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vxor.vi v24, v24, -1
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: select_nxv8i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, zero, -1
-; RV64-NEXT:    bnez a0, .LBB56_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a1, zero
-; RV64-NEXT:  .LBB56_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vmv.v.x v24, a1
-; RV64-NEXT:    vxor.vi v24, v24, -1
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: select_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vmsne.vi v0, v25, 0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    ret
   %v = select i1 %c, <vscale x 8 x i64> %a, <vscale x 8 x i64> %b
   ret <vscale x 8 x i64> %v
 }
@@ -1288,41 +871,26 @@ define <vscale x 8 x i64> @select_nxv8i64(i1 zeroext %c, <vscale x 8 x i64> %a,
 define <vscale x 8 x i64> @selectcc_nxv8i64(i64 signext %a, i64 signext %b, <vscale x 8 x i64> %c, <vscale x 8 x i64> %d) {
 ; RV32-LABEL: selectcc_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    xor a0, a0, a2
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    addi a0, zero, -1
-; RV32-NEXT:    bnez a1, .LBB57_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, zero
-; RV32-NEXT:  .LBB57_2:
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    vxor.vi v24, v24, -1
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; RV32-NEXT:    vmv.v.x v25, a0
+; RV32-NEXT:    vmsne.vi v0, v25, 0
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: selectcc_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a2, zero, -1
-; RV64-NEXT:    bne a0, a1, .LBB57_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, zero
-; RV64-NEXT:  .LBB57_2:
-; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vmv.v.x v24, a2
-; RV64-NEXT:    vxor.vi v24, v24, -1
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; RV64-NEXT:    vmv.v.x v25, a0
+; RV64-NEXT:    vmsne.vi v0, v25, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; RV64-NEXT:    ret
   %cmp = icmp ne i64 %a, %b
   %v = select i1 %cmp, <vscale x 8 x i64> %c, <vscale x 8 x i64> %d


        


More information about the llvm-commits mailing list