[llvm] 6db0ced - [LegalizeVectorOps][RISCV] Add scalable-vector SELECT expansion
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Mon May 10 00:30:19 PDT 2021
Author: Fraser Cormack
Date: 2021-05-10T08:22:35+01:00
New Revision: 6db0cedd238590023398bb20dad94773b56c4c74
URL: https://github.com/llvm/llvm-project/commit/6db0cedd238590023398bb20dad94773b56c4c74
DIFF: https://github.com/llvm/llvm-project/commit/6db0cedd238590023398bb20dad94773b56c4c74.diff
LOG: [LegalizeVectorOps][RISCV] Add scalable-vector SELECT expansion
This patch extends VectorLegalizer::ExpandSELECT to permit expansion
also for scalable vector types. The only real change is conditionally
checking for BUILD_VECTOR or SPLAT_VECTOR legality depending on the
vector type.
We can use this to fix "cannot select" errors for scalable vector
selects on the RISCV target. Note that in future patches RISCV will
possibly custom-lower vector SELECTs to VSELECTs for branchless codegen.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D102063
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
llvm/test/CodeGen/RISCV/rvv/select-fp.ll
llvm/test/CodeGen/RISCV/rvv/select-int.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8dd8da3527344..581708112e896 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -924,11 +924,16 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
// 'bitcasted' to another type which is handled.
- // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ // Also, we need to be able to construct a splat vector using either
+ // BUILD_VECTOR or SPLAT_VECTOR.
+ // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
+ // BUILD_VECTOR?
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
+ : ISD::SPLAT_VECTOR,
+ VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Node);
// Generate a mask operand.
@@ -942,8 +947,11 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
BitTy),
DAG.getConstant(0, DL, BitTy));
- // Broadcast the mask so that the entire vector is all-one or all zero.
- Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
+ // Broadcast the mask so that the entire vector is all one or all zero.
+ if (VT.isFixedLengthVector())
+ Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
+ else
+ Mask = DAG.getSplatVector(MaskTy, DL, Mask);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7573de05dd978..9f2e5a019b4bb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -442,6 +442,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
@@ -517,6 +520,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
@@ -571,6 +577,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
@@ -695,6 +704,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
@@ -762,6 +773,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BITCAST, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
new file mode 100644
index 0000000000000..2f502f59c79dc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
@@ -0,0 +1,3752 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) {
+; CHECK-LABEL: select_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft3, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft2, v25
+; CHECK-NEXT: bnez a0, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.h ft0, ft1
+; CHECK-NEXT: fmv.h ft2, ft3
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.v.f v8, ft2
+; CHECK-NEXT: vfmv.s.f v8, ft0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x half> %a, <2 x half> %b
+ ret <2 x half> %v
+}
+
+define <2 x half> @selectcc_v2f16(half %a, half %b, <2 x half> %c, <2 x half> %d) {
+; CHECK-LABEL: selectcc_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.h a0, fa0, fa1
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: bnez a0, .LBB1_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.h ft0, ft1
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.v.f v25, ft0
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB1_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.h ft0, ft1
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.s.f v25, ft0
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <2 x half> %c, <2 x half> %d
+ ret <2 x half> %v
+}
+
+define <4 x half> @select_v4f16(i1 zeroext %c, <4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: select_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB2_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.h ft0, ft1
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: fsh ft0, 8(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 3
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 3
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB2_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: fsh ft1, 14(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 2
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 2
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB2_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB2_6:
+; CHECK-NEXT: fsh ft1, 12(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB2_8
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB2_8:
+; CHECK-NEXT: fsh ft1, 10(sp)
+; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <4 x half> %a, <4 x half> %b
+ ret <4 x half> %v
+}
+
+define <4 x half> @selectcc_v4f16(half %a, half %b, <4 x half> %c, <4 x half> %d) {
+; CHECK-LABEL: selectcc_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: feq.h a0, fa0, fa1
+; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB3_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.h ft0, ft1
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: fsh ft0, 8(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 3
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 3
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB3_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT: fsh ft1, 14(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 2
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 2
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB3_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT: fsh ft1, 12(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB3_8
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB3_8:
+; CHECK-NEXT: fsh ft1, 10(sp)
+; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <4 x half> %c, <4 x half> %d
+ ret <4 x half> %v
+}
+
+define <8 x half> @select_v8f16(i1 zeroext %c, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: select_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.h ft0, ft1
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: fsh ft0, 0(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 7
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 7
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB4_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: fsh ft1, 14(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 6
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 6
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB4_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB4_6:
+; CHECK-NEXT: fsh ft1, 12(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 5
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 5
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB4_8
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB4_8:
+; CHECK-NEXT: fsh ft1, 10(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 4
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 4
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB4_10
+; CHECK-NEXT: # %bb.9:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB4_10:
+; CHECK-NEXT: fsh ft1, 8(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 3
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 3
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB4_12
+; CHECK-NEXT: # %bb.11:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB4_12:
+; CHECK-NEXT: fsh ft1, 6(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 2
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 2
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB4_14
+; CHECK-NEXT: # %bb.13:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB4_14:
+; CHECK-NEXT: fsh ft1, 4(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB4_16
+; CHECK-NEXT: # %bb.15:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB4_16:
+; CHECK-NEXT: fsh ft1, 2(sp)
+; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v8, (sp)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <8 x half> %a, <8 x half> %b
+ ret <8 x half> %v
+}
+
+define <8 x half> @selectcc_v8f16(half %a, half %b, <8 x half> %c, <8 x half> %d) {
+; CHECK-LABEL: selectcc_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: feq.h a0, fa0, fa1
+; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB5_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.h ft0, ft1
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: fsh ft0, 0(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 7
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 7
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB5_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB5_4:
+; CHECK-NEXT: fsh ft1, 14(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 6
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 6
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB5_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB5_6:
+; CHECK-NEXT: fsh ft1, 12(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 5
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 5
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB5_8
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB5_8:
+; CHECK-NEXT: fsh ft1, 10(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 4
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 4
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB5_10
+; CHECK-NEXT: # %bb.9:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB5_10:
+; CHECK-NEXT: fsh ft1, 8(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 3
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 3
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB5_12
+; CHECK-NEXT: # %bb.11:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB5_12:
+; CHECK-NEXT: fsh ft1, 6(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 2
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 2
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB5_14
+; CHECK-NEXT: # %bb.13:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB5_14:
+; CHECK-NEXT: fsh ft1, 4(sp)
+; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB5_16
+; CHECK-NEXT: # %bb.15:
+; CHECK-NEXT: fmv.h ft1, ft0
+; CHECK-NEXT: .LBB5_16:
+; CHECK-NEXT: fsh ft1, 2(sp)
+; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v8, (sp)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <8 x half> %c, <8 x half> %d
+ ret <8 x half> %v
+}
+
+define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b) {
+; RV32-LABEL: select_v16f16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: .cfi_def_cfa_offset 64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 64
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -32
+; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v10
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB6_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.h ft0, ft1
+; RV32-NEXT: .LBB6_2:
+; RV32-NEXT: fsh ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 15
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 15
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_4:
+; RV32-NEXT: fsh ft1, 30(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 14
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 14
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_6:
+; RV32-NEXT: fsh ft1, 28(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 13
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 13
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_8:
+; RV32-NEXT: fsh ft1, 26(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 12
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 12
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_10:
+; RV32-NEXT: fsh ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 11
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 11
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_12:
+; RV32-NEXT: fsh ft1, 22(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 10
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 10
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_14:
+; RV32-NEXT: fsh ft1, 20(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 9
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 9
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_16:
+; RV32-NEXT: fsh ft1, 18(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 8
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 8
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_18
+; RV32-NEXT: # %bb.17:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_18:
+; RV32-NEXT: fsh ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 7
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_20
+; RV32-NEXT: # %bb.19:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_20:
+; RV32-NEXT: fsh ft1, 14(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 6
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_22
+; RV32-NEXT: # %bb.21:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_22:
+; RV32-NEXT: fsh ft1, 12(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 5
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_24
+; RV32-NEXT: # %bb.23:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_24:
+; RV32-NEXT: fsh ft1, 10(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 4
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_26
+; RV32-NEXT: # %bb.25:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_26:
+; RV32-NEXT: fsh ft1, 8(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 3
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_28
+; RV32-NEXT: # %bb.27:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_28:
+; RV32-NEXT: fsh ft1, 6(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 2
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_30
+; RV32-NEXT: # %bb.29:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_30:
+; RV32-NEXT: fsh ft1, 4(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 1
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB6_32
+; RV32-NEXT: # %bb.31:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB6_32:
+; RV32-NEXT: fsh ft1, 2(sp)
+; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; RV32-NEXT: vle16.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -64
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v16f16:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: .cfi_def_cfa_offset 64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 64
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -32
+; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v10
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB6_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.h ft0, ft1
+; RV64-NEXT: .LBB6_2:
+; RV64-NEXT: fsh ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 15
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 15
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_4:
+; RV64-NEXT: fsh ft1, 30(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 14
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 14
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_6:
+; RV64-NEXT: fsh ft1, 28(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 13
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 13
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_8:
+; RV64-NEXT: fsh ft1, 26(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 12
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 12
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_10:
+; RV64-NEXT: fsh ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 11
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 11
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_12:
+; RV64-NEXT: fsh ft1, 22(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 10
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 10
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_14:
+; RV64-NEXT: fsh ft1, 20(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 9
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 9
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_16:
+; RV64-NEXT: fsh ft1, 18(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 8
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 8
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_18
+; RV64-NEXT: # %bb.17:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_18:
+; RV64-NEXT: fsh ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 7
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_20
+; RV64-NEXT: # %bb.19:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_20:
+; RV64-NEXT: fsh ft1, 14(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 6
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_22
+; RV64-NEXT: # %bb.21:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_22:
+; RV64-NEXT: fsh ft1, 12(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 5
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_24
+; RV64-NEXT: # %bb.23:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_24:
+; RV64-NEXT: fsh ft1, 10(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 4
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_26
+; RV64-NEXT: # %bb.25:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_26:
+; RV64-NEXT: fsh ft1, 8(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 3
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_28
+; RV64-NEXT: # %bb.27:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_28:
+; RV64-NEXT: fsh ft1, 6(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 2
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_30
+; RV64-NEXT: # %bb.29:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_30:
+; RV64-NEXT: fsh ft1, 4(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 1
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB6_32
+; RV64-NEXT: # %bb.31:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB6_32:
+; RV64-NEXT: fsh ft1, 2(sp)
+; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; RV64-NEXT: vle16.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -64
+; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ret
+ %v = select i1 %c, <16 x half> %a, <16 x half> %b
+ ret <16 x half> %v
+}
+
+define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half> %d) {
+; RV32-LABEL: selectcc_v16f16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: .cfi_def_cfa_offset 64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 64
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -32
+; RV32-NEXT: feq.h a0, fa0, fa1
+; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v10
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB7_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.h ft0, ft1
+; RV32-NEXT: .LBB7_2:
+; RV32-NEXT: fsh ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 15
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 15
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_4:
+; RV32-NEXT: fsh ft1, 30(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 14
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 14
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_6:
+; RV32-NEXT: fsh ft1, 28(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 13
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 13
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_8:
+; RV32-NEXT: fsh ft1, 26(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 12
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 12
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_10:
+; RV32-NEXT: fsh ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 11
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 11
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_12:
+; RV32-NEXT: fsh ft1, 22(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 10
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 10
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_14:
+; RV32-NEXT: fsh ft1, 20(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 9
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 9
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_16:
+; RV32-NEXT: fsh ft1, 18(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 8
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 8
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_18
+; RV32-NEXT: # %bb.17:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_18:
+; RV32-NEXT: fsh ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 7
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_20
+; RV32-NEXT: # %bb.19:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_20:
+; RV32-NEXT: fsh ft1, 14(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 6
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_22
+; RV32-NEXT: # %bb.21:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_22:
+; RV32-NEXT: fsh ft1, 12(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 5
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_24
+; RV32-NEXT: # %bb.23:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_24:
+; RV32-NEXT: fsh ft1, 10(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 4
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_26
+; RV32-NEXT: # %bb.25:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_26:
+; RV32-NEXT: fsh ft1, 8(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 3
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_28
+; RV32-NEXT: # %bb.27:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_28:
+; RV32-NEXT: fsh ft1, 6(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 2
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_30
+; RV32-NEXT: # %bb.29:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_30:
+; RV32-NEXT: fsh ft1, 4(sp)
+; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 1
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB7_32
+; RV32-NEXT: # %bb.31:
+; RV32-NEXT: fmv.h ft1, ft0
+; RV32-NEXT: .LBB7_32:
+; RV32-NEXT: fsh ft1, 2(sp)
+; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; RV32-NEXT: vle16.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -64
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v16f16:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: .cfi_def_cfa_offset 64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 64
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -32
+; RV64-NEXT: feq.h a0, fa0, fa1
+; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v10
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB7_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.h ft0, ft1
+; RV64-NEXT: .LBB7_2:
+; RV64-NEXT: fsh ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 15
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 15
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_4:
+; RV64-NEXT: fsh ft1, 30(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 14
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 14
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_6:
+; RV64-NEXT: fsh ft1, 28(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 13
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 13
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_8:
+; RV64-NEXT: fsh ft1, 26(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 12
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 12
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_10:
+; RV64-NEXT: fsh ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 11
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 11
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_12:
+; RV64-NEXT: fsh ft1, 22(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 10
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 10
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_14:
+; RV64-NEXT: fsh ft1, 20(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 9
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 9
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_16:
+; RV64-NEXT: fsh ft1, 18(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 8
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 8
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_18
+; RV64-NEXT: # %bb.17:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_18:
+; RV64-NEXT: fsh ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 7
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_20
+; RV64-NEXT: # %bb.19:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_20:
+; RV64-NEXT: fsh ft1, 14(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 6
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_22
+; RV64-NEXT: # %bb.21:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_22:
+; RV64-NEXT: fsh ft1, 12(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 5
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_24
+; RV64-NEXT: # %bb.23:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_24:
+; RV64-NEXT: fsh ft1, 10(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 4
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_26
+; RV64-NEXT: # %bb.25:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_26:
+; RV64-NEXT: fsh ft1, 8(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 3
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_28
+; RV64-NEXT: # %bb.27:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_28:
+; RV64-NEXT: fsh ft1, 6(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 2
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_30
+; RV64-NEXT: # %bb.29:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_30:
+; RV64-NEXT: fsh ft1, 4(sp)
+; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 1
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB7_32
+; RV64-NEXT: # %bb.31:
+; RV64-NEXT: fmv.h ft1, ft0
+; RV64-NEXT: .LBB7_32:
+; RV64-NEXT: fsh ft1, 2(sp)
+; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; RV64-NEXT: vle16.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -64
+; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <16 x half> %c, <16 x half> %d
+ ret <16 x half> %v
+}
+
+define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: select_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft3, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft2, v25
+; CHECK-NEXT: bnez a0, .LBB8_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.s ft0, ft1
+; CHECK-NEXT: fmv.s ft2, ft3
+; CHECK-NEXT: .LBB8_2:
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vfmv.v.f v8, ft2
+; CHECK-NEXT: vfmv.s.f v8, ft0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %v
+}
+
+define <2 x float> @selectcc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: selectcc_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.s a0, fa0, fa1
+; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: bnez a0, .LBB9_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.s ft0, ft1
+; CHECK-NEXT: .LBB9_2:
+; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu
+; CHECK-NEXT: vfmv.v.f v25, ft0
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB9_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.s ft0, ft1
+; CHECK-NEXT: .LBB9_4:
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vfmv.s.f v25, ft0
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <2 x float> %c, <2 x float> %d
+ ret <2 x float> %v
+}
+
+define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: select_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.s ft0, ft1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: fsw ft0, 0(sp)
+; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 3
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 3
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB10_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.s ft1, ft0
+; CHECK-NEXT: .LBB10_4:
+; CHECK-NEXT: fsw ft1, 12(sp)
+; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 2
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 2
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB10_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fmv.s ft1, ft0
+; CHECK-NEXT: .LBB10_6:
+; CHECK-NEXT: fsw ft1, 8(sp)
+; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB10_8
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: fmv.s ft1, ft0
+; CHECK-NEXT: .LBB10_8:
+; CHECK-NEXT: fsw ft1, 4(sp)
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v8, (sp)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <4 x float> %a, <4 x float> %b
+ ret <4 x float> %v
+}
+
+define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x float> %d) {
+; CHECK-LABEL: selectcc_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: feq.s a0, fa0, fa1
+; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.s ft0, ft1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: fsw ft0, 0(sp)
+; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 3
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 3
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB11_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.s ft1, ft0
+; CHECK-NEXT: .LBB11_4:
+; CHECK-NEXT: fsw ft1, 12(sp)
+; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 2
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 2
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB11_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: fmv.s ft1, ft0
+; CHECK-NEXT: .LBB11_6:
+; CHECK-NEXT: fsw ft1, 8(sp)
+; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: bnez a0, .LBB11_8
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: fmv.s ft1, ft0
+; CHECK-NEXT: .LBB11_8:
+; CHECK-NEXT: fsw ft1, 4(sp)
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v8, (sp)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <4 x float> %c, <4 x float> %d
+ ret <4 x float> %v
+}
+
+define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b) {
+; RV32-LABEL: select_v8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: .cfi_def_cfa_offset 64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 64
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -32
+; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v10
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB12_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.s ft0, ft1
+; RV32-NEXT: .LBB12_2:
+; RV32-NEXT: fsw ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 7
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB12_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB12_4:
+; RV32-NEXT: fsw ft1, 28(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 6
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB12_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB12_6:
+; RV32-NEXT: fsw ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 5
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB12_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB12_8:
+; RV32-NEXT: fsw ft1, 20(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 4
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB12_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB12_10:
+; RV32-NEXT: fsw ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 3
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB12_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB12_12:
+; RV32-NEXT: fsw ft1, 12(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 2
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB12_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB12_14:
+; RV32-NEXT: fsw ft1, 8(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 1
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB12_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB12_16:
+; RV32-NEXT: fsw ft1, 4(sp)
+; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; RV32-NEXT: vle32.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -64
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v8f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: .cfi_def_cfa_offset 64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 64
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -32
+; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v10
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB12_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.s ft0, ft1
+; RV64-NEXT: .LBB12_2:
+; RV64-NEXT: fsw ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 7
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB12_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB12_4:
+; RV64-NEXT: fsw ft1, 28(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 6
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB12_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB12_6:
+; RV64-NEXT: fsw ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 5
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB12_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB12_8:
+; RV64-NEXT: fsw ft1, 20(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 4
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB12_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB12_10:
+; RV64-NEXT: fsw ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 3
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB12_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB12_12:
+; RV64-NEXT: fsw ft1, 12(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 2
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB12_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB12_14:
+; RV64-NEXT: fsw ft1, 8(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 1
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB12_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB12_16:
+; RV64-NEXT: fsw ft1, 4(sp)
+; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; RV64-NEXT: vle32.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -64
+; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ret
+ %v = select i1 %c, <8 x float> %a, <8 x float> %b
+ ret <8 x float> %v
+}
+
+define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x float> %d) {
+; RV32-LABEL: selectcc_v8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: .cfi_def_cfa_offset 64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 64
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -32
+; RV32-NEXT: feq.s a0, fa0, fa1
+; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v10
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB13_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.s ft0, ft1
+; RV32-NEXT: .LBB13_2:
+; RV32-NEXT: fsw ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 7
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB13_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB13_4:
+; RV32-NEXT: fsw ft1, 28(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 6
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB13_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB13_6:
+; RV32-NEXT: fsw ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 5
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB13_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB13_8:
+; RV32-NEXT: fsw ft1, 20(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 4
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB13_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB13_10:
+; RV32-NEXT: fsw ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 3
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB13_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB13_12:
+; RV32-NEXT: fsw ft1, 12(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 2
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB13_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB13_14:
+; RV32-NEXT: fsw ft1, 8(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 1
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB13_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB13_16:
+; RV32-NEXT: fsw ft1, 4(sp)
+; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; RV32-NEXT: vle32.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -64
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v8f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: .cfi_def_cfa_offset 64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 64
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -32
+; RV64-NEXT: feq.s a0, fa0, fa1
+; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v10
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB13_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.s ft0, ft1
+; RV64-NEXT: .LBB13_2:
+; RV64-NEXT: fsw ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 7
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB13_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB13_4:
+; RV64-NEXT: fsw ft1, 28(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 6
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB13_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB13_6:
+; RV64-NEXT: fsw ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 5
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB13_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB13_8:
+; RV64-NEXT: fsw ft1, 20(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 4
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB13_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB13_10:
+; RV64-NEXT: fsw ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 3
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB13_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB13_12:
+; RV64-NEXT: fsw ft1, 12(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 2
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB13_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB13_14:
+; RV64-NEXT: fsw ft1, 8(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 1
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB13_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB13_16:
+; RV64-NEXT: fsw ft1, 4(sp)
+; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; RV64-NEXT: vle32.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -64
+; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <8 x float> %c, <8 x float> %d
+ ret <8 x float> %v
+}
+
+define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float> %b) {
+; RV32-LABEL: select_v16f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -128
+; RV32-NEXT: .cfi_def_cfa_offset 128
+; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 128
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v12
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB14_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.s ft0, ft1
+; RV32-NEXT: .LBB14_2:
+; RV32-NEXT: fsw ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 15
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 15
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_4:
+; RV32-NEXT: fsw ft1, 60(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 14
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 14
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_6:
+; RV32-NEXT: fsw ft1, 56(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 13
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 13
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_8:
+; RV32-NEXT: fsw ft1, 52(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 12
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 12
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_10:
+; RV32-NEXT: fsw ft1, 48(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 11
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 11
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_12:
+; RV32-NEXT: fsw ft1, 44(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 10
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 10
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_14:
+; RV32-NEXT: fsw ft1, 40(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 9
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 9
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_16:
+; RV32-NEXT: fsw ft1, 36(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 8
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 8
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_18
+; RV32-NEXT: # %bb.17:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_18:
+; RV32-NEXT: fsw ft1, 32(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 7
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_20
+; RV32-NEXT: # %bb.19:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_20:
+; RV32-NEXT: fsw ft1, 28(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 6
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_22
+; RV32-NEXT: # %bb.21:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_22:
+; RV32-NEXT: fsw ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 5
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_24
+; RV32-NEXT: # %bb.23:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_24:
+; RV32-NEXT: fsw ft1, 20(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 4
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_26
+; RV32-NEXT: # %bb.25:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_26:
+; RV32-NEXT: fsw ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 3
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_28
+; RV32-NEXT: # %bb.27:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_28:
+; RV32-NEXT: fsw ft1, 12(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 2
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_30
+; RV32-NEXT: # %bb.29:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_30:
+; RV32-NEXT: fsw ft1, 8(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 1
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB14_32
+; RV32-NEXT: # %bb.31:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB14_32:
+; RV32-NEXT: fsw ft1, 4(sp)
+; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; RV32-NEXT: vle32.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -128
+; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 128
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v16f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -128
+; RV64-NEXT: .cfi_def_cfa_offset 128
+; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 128
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v12
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB14_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.s ft0, ft1
+; RV64-NEXT: .LBB14_2:
+; RV64-NEXT: fsw ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 15
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 15
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_4:
+; RV64-NEXT: fsw ft1, 60(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 14
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 14
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_6:
+; RV64-NEXT: fsw ft1, 56(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 13
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 13
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_8:
+; RV64-NEXT: fsw ft1, 52(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 12
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 12
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_10:
+; RV64-NEXT: fsw ft1, 48(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 11
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 11
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_12:
+; RV64-NEXT: fsw ft1, 44(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 10
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 10
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_14:
+; RV64-NEXT: fsw ft1, 40(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 9
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 9
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_16:
+; RV64-NEXT: fsw ft1, 36(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 8
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 8
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_18
+; RV64-NEXT: # %bb.17:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_18:
+; RV64-NEXT: fsw ft1, 32(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 7
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_20
+; RV64-NEXT: # %bb.19:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_20:
+; RV64-NEXT: fsw ft1, 28(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 6
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_22
+; RV64-NEXT: # %bb.21:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_22:
+; RV64-NEXT: fsw ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 5
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_24
+; RV64-NEXT: # %bb.23:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_24:
+; RV64-NEXT: fsw ft1, 20(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 4
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_26
+; RV64-NEXT: # %bb.25:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_26:
+; RV64-NEXT: fsw ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 3
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_28
+; RV64-NEXT: # %bb.27:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_28:
+; RV64-NEXT: fsw ft1, 12(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 2
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_30
+; RV64-NEXT: # %bb.29:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_30:
+; RV64-NEXT: fsw ft1, 8(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 1
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB14_32
+; RV64-NEXT: # %bb.31:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB14_32:
+; RV64-NEXT: fsw ft1, 4(sp)
+; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; RV64-NEXT: vle32.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -128
+; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 128
+; RV64-NEXT: ret
+ %v = select i1 %c, <16 x float> %a, <16 x float> %b
+ ret <16 x float> %v
+}
+
+define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x float> %d) {
+; RV32-LABEL: selectcc_v16f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -128
+; RV32-NEXT: .cfi_def_cfa_offset 128
+; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 128
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: feq.s a0, fa0, fa1
+; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v12
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB15_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.s ft0, ft1
+; RV32-NEXT: .LBB15_2:
+; RV32-NEXT: fsw ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 15
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 15
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_4:
+; RV32-NEXT: fsw ft1, 60(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 14
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 14
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_6:
+; RV32-NEXT: fsw ft1, 56(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 13
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 13
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_8:
+; RV32-NEXT: fsw ft1, 52(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 12
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 12
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_10:
+; RV32-NEXT: fsw ft1, 48(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 11
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 11
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_12:
+; RV32-NEXT: fsw ft1, 44(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 10
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 10
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_14:
+; RV32-NEXT: fsw ft1, 40(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 9
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 9
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_16:
+; RV32-NEXT: fsw ft1, 36(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 8
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 8
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_18
+; RV32-NEXT: # %bb.17:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_18:
+; RV32-NEXT: fsw ft1, 32(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 7
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_20
+; RV32-NEXT: # %bb.19:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_20:
+; RV32-NEXT: fsw ft1, 28(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 6
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_22
+; RV32-NEXT: # %bb.21:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_22:
+; RV32-NEXT: fsw ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 5
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_24
+; RV32-NEXT: # %bb.23:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_24:
+; RV32-NEXT: fsw ft1, 20(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 4
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_26
+; RV32-NEXT: # %bb.25:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_26:
+; RV32-NEXT: fsw ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 3
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_28
+; RV32-NEXT: # %bb.27:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_28:
+; RV32-NEXT: fsw ft1, 12(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 2
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_30
+; RV32-NEXT: # %bb.29:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_30:
+; RV32-NEXT: fsw ft1, 8(sp)
+; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 1
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB15_32
+; RV32-NEXT: # %bb.31:
+; RV32-NEXT: fmv.s ft1, ft0
+; RV32-NEXT: .LBB15_32:
+; RV32-NEXT: fsw ft1, 4(sp)
+; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; RV32-NEXT: vle32.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -128
+; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 128
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v16f32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -128
+; RV64-NEXT: .cfi_def_cfa_offset 128
+; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 128
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: feq.s a0, fa0, fa1
+; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v12
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB15_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.s ft0, ft1
+; RV64-NEXT: .LBB15_2:
+; RV64-NEXT: fsw ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 15
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 15
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_4:
+; RV64-NEXT: fsw ft1, 60(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 14
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 14
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_6:
+; RV64-NEXT: fsw ft1, 56(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 13
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 13
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_8:
+; RV64-NEXT: fsw ft1, 52(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 12
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 12
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_10:
+; RV64-NEXT: fsw ft1, 48(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 11
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 11
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_12:
+; RV64-NEXT: fsw ft1, 44(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 10
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 10
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_14:
+; RV64-NEXT: fsw ft1, 40(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 9
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 9
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_16:
+; RV64-NEXT: fsw ft1, 36(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 8
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 8
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_18
+; RV64-NEXT: # %bb.17:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_18:
+; RV64-NEXT: fsw ft1, 32(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 7
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_20
+; RV64-NEXT: # %bb.19:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_20:
+; RV64-NEXT: fsw ft1, 28(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 6
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_22
+; RV64-NEXT: # %bb.21:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_22:
+; RV64-NEXT: fsw ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 5
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_24
+; RV64-NEXT: # %bb.23:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_24:
+; RV64-NEXT: fsw ft1, 20(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 4
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_26
+; RV64-NEXT: # %bb.25:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_26:
+; RV64-NEXT: fsw ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 3
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_28
+; RV64-NEXT: # %bb.27:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_28:
+; RV64-NEXT: fsw ft1, 12(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 2
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_30
+; RV64-NEXT: # %bb.29:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_30:
+; RV64-NEXT: fsw ft1, 8(sp)
+; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 1
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB15_32
+; RV64-NEXT: # %bb.31:
+; RV64-NEXT: fmv.s ft1, ft0
+; RV64-NEXT: .LBB15_32:
+; RV64-NEXT: fsw ft1, 4(sp)
+; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; RV64-NEXT: vle32.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -128
+; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 128
+; RV64-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <16 x float> %c, <16 x float> %d
+ ret <16 x float> %v
+}
+
+define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: select_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft3, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft2, v25
+; CHECK-NEXT: bnez a0, .LBB16_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.d ft0, ft1
+; CHECK-NEXT: fmv.d ft2, ft3
+; CHECK-NEXT: .LBB16_2:
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vfmv.v.f v8, ft2
+; CHECK-NEXT: vfmv.s.f v8, ft0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x double> %a, <2 x double> %b
+ ret <2 x double> %v
+}
+
+define <2 x double> @selectcc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d) {
+; CHECK-LABEL: selectcc_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.d a0, fa0, fa1
+; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v9, 1
+; CHECK-NEXT: vfmv.f.s ft1, v25
+; CHECK-NEXT: vslidedown.vi v25, v8, 1
+; CHECK-NEXT: vfmv.f.s ft0, v25
+; CHECK-NEXT: bnez a0, .LBB17_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.d ft0, ft1
+; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT: vfmv.v.f v25, ft0
+; CHECK-NEXT: vfmv.f.s ft1, v9
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: bnez a0, .LBB17_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: fmv.d ft0, ft1
+; CHECK-NEXT: .LBB17_4:
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vfmv.s.f v25, ft0
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <2 x double> %c, <2 x double> %d
+ ret <2 x double> %v
+}
+
+define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> %b) {
+; RV32-LABEL: select_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: .cfi_def_cfa_offset 64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 64
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -32
+; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v10
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB18_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.d ft0, ft1
+; RV32-NEXT: .LBB18_2:
+; RV32-NEXT: fsd ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 3
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB18_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB18_4:
+; RV32-NEXT: fsd ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 2
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB18_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB18_6:
+; RV32-NEXT: fsd ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 1
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB18_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB18_8:
+; RV32-NEXT: fsd ft1, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: vle64.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -64
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: .cfi_def_cfa_offset 64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 64
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -32
+; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v10
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB18_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.d ft0, ft1
+; RV64-NEXT: .LBB18_2:
+; RV64-NEXT: fsd ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 3
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB18_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB18_4:
+; RV64-NEXT: fsd ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 2
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB18_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB18_6:
+; RV64-NEXT: fsd ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 1
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB18_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB18_8:
+; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV64-NEXT: vle64.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -64
+; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ret
+ %v = select i1 %c, <4 x double> %a, <4 x double> %b
+ ret <4 x double> %v
+}
+
+define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x double> %d) {
+; RV32-LABEL: selectcc_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -64
+; RV32-NEXT: .cfi_def_cfa_offset 64
+; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 64
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -32
+; RV32-NEXT: feq.d a0, fa0, fa1
+; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v10
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB19_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.d ft0, ft1
+; RV32-NEXT: .LBB19_2:
+; RV32-NEXT: fsd ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 3
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB19_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB19_4:
+; RV32-NEXT: fsd ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 2
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB19_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB19_6:
+; RV32-NEXT: fsd ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT: vslidedown.vi v26, v10, 1
+; RV32-NEXT: vfmv.f.s ft0, v26
+; RV32-NEXT: vslidedown.vi v26, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v26
+; RV32-NEXT: bnez a0, .LBB19_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB19_8:
+; RV32-NEXT: fsd ft1, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: vle64.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -64
+; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 64
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: .cfi_def_cfa_offset 64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 64
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -32
+; RV64-NEXT: feq.d a0, fa0, fa1
+; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v10
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB19_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.d ft0, ft1
+; RV64-NEXT: .LBB19_2:
+; RV64-NEXT: fsd ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 3
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB19_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB19_4:
+; RV64-NEXT: fsd ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 2
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB19_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB19_6:
+; RV64-NEXT: fsd ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT: vslidedown.vi v26, v10, 1
+; RV64-NEXT: vfmv.f.s ft0, v26
+; RV64-NEXT: vslidedown.vi v26, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v26
+; RV64-NEXT: bnez a0, .LBB19_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB19_8:
+; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV64-NEXT: vle64.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -64
+; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <4 x double> %c, <4 x double> %d
+ ret <4 x double> %v
+}
+
+define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> %b) {
+; RV32-LABEL: select_v8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -128
+; RV32-NEXT: .cfi_def_cfa_offset 128
+; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 128
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v12
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB20_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.d ft0, ft1
+; RV32-NEXT: .LBB20_2:
+; RV32-NEXT: fsd ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 7
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB20_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB20_4:
+; RV32-NEXT: fsd ft1, 56(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 6
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB20_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB20_6:
+; RV32-NEXT: fsd ft1, 48(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 5
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB20_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB20_8:
+; RV32-NEXT: fsd ft1, 40(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 4
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB20_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB20_10:
+; RV32-NEXT: fsd ft1, 32(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 3
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB20_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB20_12:
+; RV32-NEXT: fsd ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 2
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB20_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB20_14:
+; RV32-NEXT: fsd ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 1
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB20_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB20_16:
+; RV32-NEXT: fsd ft1, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: vle64.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -128
+; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 128
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v8f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -128
+; RV64-NEXT: .cfi_def_cfa_offset 128
+; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 128
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v12
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB20_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.d ft0, ft1
+; RV64-NEXT: .LBB20_2:
+; RV64-NEXT: fsd ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 7
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB20_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB20_4:
+; RV64-NEXT: fsd ft1, 56(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 6
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB20_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB20_6:
+; RV64-NEXT: fsd ft1, 48(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 5
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB20_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB20_8:
+; RV64-NEXT: fsd ft1, 40(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 4
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB20_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB20_10:
+; RV64-NEXT: fsd ft1, 32(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 3
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB20_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB20_12:
+; RV64-NEXT: fsd ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 2
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB20_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB20_14:
+; RV64-NEXT: fsd ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 1
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB20_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB20_16:
+; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV64-NEXT: vle64.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -128
+; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 128
+; RV64-NEXT: ret
+ %v = select i1 %c, <8 x double> %a, <8 x double> %b
+ ret <8 x double> %v
+}
+
+define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x double> %d) {
+; RV32-LABEL: selectcc_v8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -128
+; RV32-NEXT: .cfi_def_cfa_offset 128
+; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 128
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: feq.d a0, fa0, fa1
+; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v12
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB21_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.d ft0, ft1
+; RV32-NEXT: .LBB21_2:
+; RV32-NEXT: fsd ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 7
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB21_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB21_4:
+; RV32-NEXT: fsd ft1, 56(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 6
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB21_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB21_6:
+; RV32-NEXT: fsd ft1, 48(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 5
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB21_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB21_8:
+; RV32-NEXT: fsd ft1, 40(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 4
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB21_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB21_10:
+; RV32-NEXT: fsd ft1, 32(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 3
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB21_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB21_12:
+; RV32-NEXT: fsd ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 2
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB21_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB21_14:
+; RV32-NEXT: fsd ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT: vslidedown.vi v28, v12, 1
+; RV32-NEXT: vfmv.f.s ft0, v28
+; RV32-NEXT: vslidedown.vi v28, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v28
+; RV32-NEXT: bnez a0, .LBB21_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB21_16:
+; RV32-NEXT: fsd ft1, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: vle64.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -128
+; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 128
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v8f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -128
+; RV64-NEXT: .cfi_def_cfa_offset 128
+; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 128
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: feq.d a0, fa0, fa1
+; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v12
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB21_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.d ft0, ft1
+; RV64-NEXT: .LBB21_2:
+; RV64-NEXT: fsd ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 7
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB21_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB21_4:
+; RV64-NEXT: fsd ft1, 56(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 6
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB21_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB21_6:
+; RV64-NEXT: fsd ft1, 48(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 5
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB21_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB21_8:
+; RV64-NEXT: fsd ft1, 40(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 4
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB21_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB21_10:
+; RV64-NEXT: fsd ft1, 32(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 3
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB21_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB21_12:
+; RV64-NEXT: fsd ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 2
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB21_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB21_14:
+; RV64-NEXT: fsd ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT: vslidedown.vi v28, v12, 1
+; RV64-NEXT: vfmv.f.s ft0, v28
+; RV64-NEXT: vslidedown.vi v28, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v28
+; RV64-NEXT: bnez a0, .LBB21_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB21_16:
+; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV64-NEXT: vle64.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -128
+; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 128
+; RV64-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <8 x double> %c, <8 x double> %d
+ ret <8 x double> %v
+}
+
+define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x double> %b) {
+; RV32-LABEL: select_v16f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -256
+; RV32-NEXT: .cfi_def_cfa_offset 256
+; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 256
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v16
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB22_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.d ft0, ft1
+; RV32-NEXT: .LBB22_2:
+; RV32-NEXT: fsd ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 15
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 15
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_4:
+; RV32-NEXT: fsd ft1, 120(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 14
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 14
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_6:
+; RV32-NEXT: fsd ft1, 112(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 13
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 13
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_8:
+; RV32-NEXT: fsd ft1, 104(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 12
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 12
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_10:
+; RV32-NEXT: fsd ft1, 96(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 11
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 11
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_12:
+; RV32-NEXT: fsd ft1, 88(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 10
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 10
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_14:
+; RV32-NEXT: fsd ft1, 80(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 9
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 9
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_16:
+; RV32-NEXT: fsd ft1, 72(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 8
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 8
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_18
+; RV32-NEXT: # %bb.17:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_18:
+; RV32-NEXT: fsd ft1, 64(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 7
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_20
+; RV32-NEXT: # %bb.19:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_20:
+; RV32-NEXT: fsd ft1, 56(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 6
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_22
+; RV32-NEXT: # %bb.21:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_22:
+; RV32-NEXT: fsd ft1, 48(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 5
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_24
+; RV32-NEXT: # %bb.23:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_24:
+; RV32-NEXT: fsd ft1, 40(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 4
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_26
+; RV32-NEXT: # %bb.25:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_26:
+; RV32-NEXT: fsd ft1, 32(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 3
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_28
+; RV32-NEXT: # %bb.27:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_28:
+; RV32-NEXT: fsd ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 2
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB22_30
+; RV32-NEXT: # %bb.29:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_30:
+; RV32-NEXT: fsd ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v16, v16, 1
+; RV32-NEXT: vfmv.f.s ft0, v16
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v8
+; RV32-NEXT: bnez a0, .LBB22_32
+; RV32-NEXT: # %bb.31:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB22_32:
+; RV32-NEXT: fsd ft1, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: vle64.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -256
+; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 256
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v16f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -256
+; RV64-NEXT: .cfi_def_cfa_offset 256
+; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 256
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v16
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB22_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.d ft0, ft1
+; RV64-NEXT: .LBB22_2:
+; RV64-NEXT: fsd ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 15
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 15
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_4:
+; RV64-NEXT: fsd ft1, 120(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 14
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 14
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_6:
+; RV64-NEXT: fsd ft1, 112(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 13
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 13
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_8:
+; RV64-NEXT: fsd ft1, 104(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 12
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 12
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_10:
+; RV64-NEXT: fsd ft1, 96(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 11
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 11
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_12:
+; RV64-NEXT: fsd ft1, 88(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 10
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 10
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_14:
+; RV64-NEXT: fsd ft1, 80(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 9
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 9
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_16:
+; RV64-NEXT: fsd ft1, 72(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 8
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 8
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_18
+; RV64-NEXT: # %bb.17:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_18:
+; RV64-NEXT: fsd ft1, 64(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 7
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_20
+; RV64-NEXT: # %bb.19:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_20:
+; RV64-NEXT: fsd ft1, 56(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 6
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_22
+; RV64-NEXT: # %bb.21:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_22:
+; RV64-NEXT: fsd ft1, 48(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 5
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_24
+; RV64-NEXT: # %bb.23:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_24:
+; RV64-NEXT: fsd ft1, 40(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 4
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_26
+; RV64-NEXT: # %bb.25:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_26:
+; RV64-NEXT: fsd ft1, 32(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 3
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_28
+; RV64-NEXT: # %bb.27:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_28:
+; RV64-NEXT: fsd ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 2
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB22_30
+; RV64-NEXT: # %bb.29:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_30:
+; RV64-NEXT: fsd ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v16, v16, 1
+; RV64-NEXT: vfmv.f.s ft0, v16
+; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v8
+; RV64-NEXT: bnez a0, .LBB22_32
+; RV64-NEXT: # %bb.31:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB22_32:
+; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV64-NEXT: vle64.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -256
+; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 256
+; RV64-NEXT: ret
+ %v = select i1 %c, <16 x double> %a, <16 x double> %b
+ ret <16 x double> %v
+}
+
+define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <16 x double> %d) {
+; RV32-LABEL: selectcc_v16f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -256
+; RV32-NEXT: .cfi_def_cfa_offset 256
+; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 256
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: feq.d a0, fa0, fa1
+; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu
+; RV32-NEXT: vfmv.f.s ft1, v16
+; RV32-NEXT: vfmv.f.s ft0, v8
+; RV32-NEXT: bnez a0, .LBB23_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.d ft0, ft1
+; RV32-NEXT: .LBB23_2:
+; RV32-NEXT: fsd ft0, 0(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 15
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 15
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_4:
+; RV32-NEXT: fsd ft1, 120(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 14
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 14
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_6:
+; RV32-NEXT: fsd ft1, 112(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 13
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 13
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_8:
+; RV32-NEXT: fsd ft1, 104(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 12
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 12
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_10:
+; RV32-NEXT: fsd ft1, 96(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 11
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 11
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_12:
+; RV32-NEXT: fsd ft1, 88(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 10
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 10
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_14:
+; RV32-NEXT: fsd ft1, 80(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 9
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 9
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_16
+; RV32-NEXT: # %bb.15:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_16:
+; RV32-NEXT: fsd ft1, 72(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 8
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 8
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_18
+; RV32-NEXT: # %bb.17:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_18:
+; RV32-NEXT: fsd ft1, 64(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 7
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 7
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_20
+; RV32-NEXT: # %bb.19:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_20:
+; RV32-NEXT: fsd ft1, 56(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 6
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 6
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_22
+; RV32-NEXT: # %bb.21:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_22:
+; RV32-NEXT: fsd ft1, 48(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 5
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 5
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_24
+; RV32-NEXT: # %bb.23:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_24:
+; RV32-NEXT: fsd ft1, 40(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 4
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 4
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_26
+; RV32-NEXT: # %bb.25:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_26:
+; RV32-NEXT: fsd ft1, 32(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 3
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 3
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_28
+; RV32-NEXT: # %bb.27:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_28:
+; RV32-NEXT: fsd ft1, 24(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v24, v16, 2
+; RV32-NEXT: vfmv.f.s ft0, v24
+; RV32-NEXT: vslidedown.vi v24, v8, 2
+; RV32-NEXT: vfmv.f.s ft1, v24
+; RV32-NEXT: bnez a0, .LBB23_30
+; RV32-NEXT: # %bb.29:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_30:
+; RV32-NEXT: fsd ft1, 16(sp)
+; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT: vslidedown.vi v16, v16, 1
+; RV32-NEXT: vfmv.f.s ft0, v16
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vfmv.f.s ft1, v8
+; RV32-NEXT: bnez a0, .LBB23_32
+; RV32-NEXT: # %bb.31:
+; RV32-NEXT: fmv.d ft1, ft0
+; RV32-NEXT: .LBB23_32:
+; RV32-NEXT: fsd ft1, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: vle64.v v8, (sp)
+; RV32-NEXT: addi sp, s0, -256
+; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 256
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v16f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -256
+; RV64-NEXT: .cfi_def_cfa_offset 256
+; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 256
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: feq.d a0, fa0, fa1
+; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu
+; RV64-NEXT: vfmv.f.s ft1, v16
+; RV64-NEXT: vfmv.f.s ft0, v8
+; RV64-NEXT: bnez a0, .LBB23_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.d ft0, ft1
+; RV64-NEXT: .LBB23_2:
+; RV64-NEXT: fsd ft0, 0(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 15
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 15
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_4:
+; RV64-NEXT: fsd ft1, 120(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 14
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 14
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_6:
+; RV64-NEXT: fsd ft1, 112(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 13
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 13
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_8:
+; RV64-NEXT: fsd ft1, 104(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 12
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 12
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_10:
+; RV64-NEXT: fsd ft1, 96(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 11
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 11
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_12:
+; RV64-NEXT: fsd ft1, 88(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 10
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 10
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_14:
+; RV64-NEXT: fsd ft1, 80(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 9
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 9
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_16
+; RV64-NEXT: # %bb.15:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_16:
+; RV64-NEXT: fsd ft1, 72(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 8
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 8
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_18
+; RV64-NEXT: # %bb.17:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_18:
+; RV64-NEXT: fsd ft1, 64(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 7
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 7
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_20
+; RV64-NEXT: # %bb.19:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_20:
+; RV64-NEXT: fsd ft1, 56(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 6
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 6
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_22
+; RV64-NEXT: # %bb.21:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_22:
+; RV64-NEXT: fsd ft1, 48(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 5
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 5
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_24
+; RV64-NEXT: # %bb.23:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_24:
+; RV64-NEXT: fsd ft1, 40(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 4
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 4
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_26
+; RV64-NEXT: # %bb.25:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_26:
+; RV64-NEXT: fsd ft1, 32(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 3
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 3
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_28
+; RV64-NEXT: # %bb.27:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_28:
+; RV64-NEXT: fsd ft1, 24(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v24, v16, 2
+; RV64-NEXT: vfmv.f.s ft0, v24
+; RV64-NEXT: vslidedown.vi v24, v8, 2
+; RV64-NEXT: vfmv.f.s ft1, v24
+; RV64-NEXT: bnez a0, .LBB23_30
+; RV64-NEXT: # %bb.29:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_30:
+; RV64-NEXT: fsd ft1, 16(sp)
+; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT: vslidedown.vi v16, v16, 1
+; RV64-NEXT: vfmv.f.s ft0, v16
+; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vfmv.f.s ft1, v8
+; RV64-NEXT: bnez a0, .LBB23_32
+; RV64-NEXT: # %bb.31:
+; RV64-NEXT: fmv.d ft1, ft0
+; RV64-NEXT: .LBB23_32:
+; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV64-NEXT: vle64.v v8, (sp)
+; RV64-NEXT: addi sp, s0, -256
+; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 256
+; RV64-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <16 x double> %c, <16 x double> %d
+ ret <16 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
new file mode 100644
index 0000000000000..1f01629c252dd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
@@ -0,0 +1,1000 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <1 x i1> @select_v1i1(i1 zeroext %c, <1 x i1> %a, <1 x i1> %b) {
+; CHECK-LABEL: select_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <1 x i1> %a, <1 x i1> %b
+ ret <1 x i1> %v
+}
+
+define <1 x i1> @selectcc_v1i1(i1 signext %a, i1 signext %b, <1 x i1> %c, <1 x i1> %d) {
+; CHECK-LABEL: selectcc_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB1_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <1 x i1> %c, <1 x i1> %d
+ ret <1 x i1> %v
+}
+
+define <2 x i1> @select_v2i1(i1 zeroext %c, <2 x i1> %a, <2 x i1> %b) {
+; CHECK-LABEL: select_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB2_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x i1> %a, <2 x i1> %b
+ ret <2 x i1> %v
+}
+
+define <2 x i1> @selectcc_v2i1(i1 signext %a, i1 signext %b, <2 x i1> %c, <2 x i1> %d) {
+; CHECK-LABEL: selectcc_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB3_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <2 x i1> %c, <2 x i1> %d
+ ret <2 x i1> %v
+}
+
+define <4 x i1> @select_v4i1(i1 zeroext %c, <4 x i1> %a, <4 x i1> %b) {
+; CHECK-LABEL: select_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <4 x i1> %a, <4 x i1> %b
+ ret <4 x i1> %v
+}
+
+define <4 x i1> @selectcc_v4i1(i1 signext %a, i1 signext %b, <4 x i1> %c, <4 x i1> %d) {
+; CHECK-LABEL: selectcc_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB5_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <4 x i1> %c, <4 x i1> %d
+ ret <4 x i1> %v
+}
+
+define <8 x i1> @select_v8i1(i1 zeroext %c, <8 x i1> %a, <8 x i1> %b) {
+; CHECK-LABEL: select_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB6_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB6_2:
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <8 x i1> %a, <8 x i1> %b
+ ret <8 x i1> %v
+}
+
+define <8 x i1> @selectcc_v8i1(i1 signext %a, i1 signext %b, <8 x i1> %c, <8 x i1> %d) {
+; CHECK-LABEL: selectcc_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB7_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <8 x i1> %c, <8 x i1> %d
+ ret <8 x i1> %v
+}
+
+define <16 x i1> @select_v16i1(i1 zeroext %c, <16 x i1> %a, <16 x i1> %b) {
+; CHECK-LABEL: select_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB8_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB8_2:
+; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <16 x i1> %a, <16 x i1> %b
+ ret <16 x i1> %v
+}
+
+define <16 x i1> @selectcc_v16i1(i1 signext %a, i1 signext %b, <16 x i1> %c, <16 x i1> %d) {
+; CHECK-LABEL: selectcc_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB9_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB9_2:
+; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v26, v8
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <16 x i1> %c, <16 x i1> %d
+ ret <16 x i1> %v
+}
+
+define <2 x i8> @select_v2i8(i1 zeroext %c, <2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: select_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x i8> %a, <2 x i8> %b
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @selectcc_v2i8(i8 signext %a, i8 signext %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: selectcc_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <2 x i8> %c, <2 x i8> %d
+ ret <2 x i8> %v
+}
+
+define <4 x i8> @select_v4i8(i1 zeroext %c, <4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: select_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB12_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <4 x i8> %a, <4 x i8> %b
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @selectcc_v4i8(i8 signext %a, i8 signext %b, <4 x i8> %c, <4 x i8> %d) {
+; CHECK-LABEL: selectcc_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB13_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB13_2:
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <4 x i8> %c, <4 x i8> %d
+ ret <4 x i8> %v
+}
+
+define <8 x i8> @select_v8i8(i1 zeroext %c, <8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: select_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB14_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB14_2:
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <8 x i8> %a, <8 x i8> %b
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @selectcc_v8i8(i8 signext %a, i8 signext %b, <8 x i8> %c, <8 x i8> %d) {
+; CHECK-LABEL: selectcc_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB15_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB15_2:
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <8 x i8> %c, <8 x i8> %d
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: select_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB16_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB16_2:
+; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @selectcc_v16i8(i8 signext %a, i8 signext %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: selectcc_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB17_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <16 x i8> %c, <16 x i8> %d
+ ret <16 x i8> %v
+}
+
+define <2 x i16> @select_v2i16(i1 zeroext %c, <2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: select_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB18_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x i16> %a, <2 x i16> %b
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @selectcc_v2i16(i16 signext %a, i16 signext %b, <2 x i16> %c, <2 x i16> %d) {
+; CHECK-LABEL: selectcc_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB19_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB19_2:
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <2 x i16> %c, <2 x i16> %d
+ ret <2 x i16> %v
+}
+
+define <4 x i16> @select_v4i16(i1 zeroext %c, <4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: select_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB20_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB20_2:
+; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <4 x i16> %a, <4 x i16> %b
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @selectcc_v4i16(i16 signext %a, i16 signext %b, <4 x i16> %c, <4 x i16> %d) {
+; CHECK-LABEL: selectcc_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB21_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB21_2:
+; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <4 x i16> %c, <4 x i16> %d
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: select_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @selectcc_v8i16(i16 signext %a, i16 signext %b, <8 x i16> %c, <8 x i16> %d) {
+; CHECK-LABEL: selectcc_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <8 x i16> %c, <8 x i16> %d
+ ret <8 x i16> %v
+}
+
+define <16 x i16> @select_v16i16(i1 zeroext %c, <16 x i16> %a, <16 x i16> %b) {
+; CHECK-LABEL: select_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB24_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB24_2:
+; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a1
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %v = select i1 %c, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @selectcc_v16i16(i16 signext %a, i16 signext %b, <16 x i16> %c, <16 x i16> %d) {
+; CHECK-LABEL: selectcc_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a2
+; CHECK-NEXT: vmv.v.x v28, a2
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <16 x i16> %c, <16 x i16> %d
+ ret <16 x i16> %v
+}
+
+define <2 x i32> @select_v2i32(i1 zeroext %c, <2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: select_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB26_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @selectcc_v2i32(i32 signext %a, i32 signext %b, <2 x i32> %c, <2 x i32> %d) {
+; CHECK-LABEL: selectcc_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB27_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <2 x i32> %c, <2 x i32> %d
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: select_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB28_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB28_2:
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @selectcc_v4i32(i32 signext %a, i32 signext %b, <4 x i32> %c, <4 x i32> %d) {
+; CHECK-LABEL: selectcc_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB29_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB29_2:
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
+ ret <4 x i32> %v
+}
+
+define <8 x i32> @select_v8i32(i1 zeroext %c, <8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: select_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB30_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB30_2:
+; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a1
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %v = select i1 %c, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @selectcc_v8i32(i32 signext %a, i32 signext %b, <8 x i32> %c, <8 x i32> %d) {
+; CHECK-LABEL: selectcc_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB31_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB31_2:
+; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a2
+; CHECK-NEXT: vmv.v.x v28, a2
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <8 x i32> %c, <8 x i32> %d
+ ret <8 x i32> %v
+}
+
+define <16 x i32> @select_v16i32(i1 zeroext %c, <16 x i32> %a, <16 x i32> %b) {
+; CHECK-LABEL: select_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a1
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %v = select i1 %c, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @selectcc_v16i32(i32 signext %a, i32 signext %b, <16 x i32> %c, <16 x i32> %d) {
+; CHECK-LABEL: selectcc_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a2
+; CHECK-NEXT: vmv.v.x v8, a2
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <16 x i32> %c, <16 x i32> %d
+ ret <16 x i32> %v
+}
+
+define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %a, <2 x i64> %b) {
+; RV32-LABEL: select_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB34_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB34_2:
+; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.x v25, a1
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: vand.vv v26, v8, v25
+; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.i v27, -1
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: vxor.vv v25, v25, v27
+; RV32-NEXT: vand.vv v25, v9, v25
+; RV32-NEXT: vor.vv v8, v26, v25
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB34_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB34_2:
+; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV64-NEXT: vand.vx v25, v8, a1
+; RV64-NEXT: vmv.v.x v26, a1
+; RV64-NEXT: vxor.vi v26, v26, -1
+; RV64-NEXT: vand.vv v26, v9, v26
+; RV64-NEXT: vor.vv v8, v25, v26
+; RV64-NEXT: ret
+ %v = select i1 %c, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @selectcc_v2i64(i64 signext %a, i64 signext %b, <2 x i64> %c, <2 x i64> %d) {
+; RV32-LABEL: selectcc_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB35_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB35_2:
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.x v25, a0
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: vand.vv v26, v8, v25
+; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.i v27, -1
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: vxor.vv v25, v25, v27
+; RV32-NEXT: vand.vv v25, v9, v25
+; RV32-NEXT: vor.vv v8, v26, v25
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB35_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB35_2:
+; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV64-NEXT: vand.vx v25, v8, a2
+; RV64-NEXT: vmv.v.x v26, a2
+; RV64-NEXT: vxor.vi v26, v26, -1
+; RV64-NEXT: vand.vv v26, v9, v26
+; RV64-NEXT: vor.vv v8, v25, v26
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <2 x i64> %c, <2 x i64> %d
+ ret <2 x i64> %v
+}
+
+define <4 x i64> @select_v4i64(i1 zeroext %c, <4 x i64> %a, <4 x i64> %b) {
+; RV32-LABEL: select_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB36_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB36_2:
+; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; RV32-NEXT: vmv.v.x v26, a1
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: vand.vv v28, v8, v26
+; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; RV32-NEXT: vmv.v.i v30, -1
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: vxor.vv v26, v26, v30
+; RV32-NEXT: vand.vv v26, v10, v26
+; RV32-NEXT: vor.vv v8, v28, v26
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB36_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB36_2:
+; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV64-NEXT: vand.vx v26, v8, a1
+; RV64-NEXT: vmv.v.x v28, a1
+; RV64-NEXT: vxor.vi v28, v28, -1
+; RV64-NEXT: vand.vv v28, v10, v28
+; RV64-NEXT: vor.vv v8, v26, v28
+; RV64-NEXT: ret
+ %v = select i1 %c, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @selectcc_v4i64(i64 signext %a, i64 signext %b, <4 x i64> %c, <4 x i64> %d) {
+; RV32-LABEL: selectcc_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB37_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB37_2:
+; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; RV32-NEXT: vmv.v.x v26, a0
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: vand.vv v28, v8, v26
+; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; RV32-NEXT: vmv.v.i v30, -1
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: vxor.vv v26, v26, v30
+; RV32-NEXT: vand.vv v26, v10, v26
+; RV32-NEXT: vor.vv v8, v28, v26
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB37_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB37_2:
+; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV64-NEXT: vand.vx v26, v8, a2
+; RV64-NEXT: vmv.v.x v28, a2
+; RV64-NEXT: vxor.vi v28, v28, -1
+; RV64-NEXT: vand.vv v28, v10, v28
+; RV64-NEXT: vor.vv v8, v26, v28
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <4 x i64> %c, <4 x i64> %d
+ ret <4 x i64> %v
+}
+
+define <8 x i64> @select_v8i64(i1 zeroext %c, <8 x i64> %a, <8 x i64> %b) {
+; RV32-LABEL: select_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB38_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB38_2:
+; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; RV32-NEXT: vmv.v.x v28, a1
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: vand.vv v8, v8, v28
+; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; RV32-NEXT: vmv.v.i v16, -1
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: vxor.vv v28, v28, v16
+; RV32-NEXT: vand.vv v28, v12, v28
+; RV32-NEXT: vor.vv v8, v8, v28
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB38_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB38_2:
+; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV64-NEXT: vand.vx v28, v8, a1
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vxor.vi v8, v8, -1
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vor.vv v8, v28, v8
+; RV64-NEXT: ret
+ %v = select i1 %c, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @selectcc_v8i64(i64 signext %a, i64 signext %b, <8 x i64> %c, <8 x i64> %d) {
+; RV32-LABEL: selectcc_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB39_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB39_2:
+; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu
+; RV32-NEXT: vmv.v.x v28, a0
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: vand.vv v8, v8, v28
+; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; RV32-NEXT: vmv.v.i v16, -1
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: vxor.vv v28, v28, v16
+; RV32-NEXT: vand.vv v28, v12, v28
+; RV32-NEXT: vor.vv v8, v8, v28
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB39_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB39_2:
+; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV64-NEXT: vand.vx v28, v8, a2
+; RV64-NEXT: vmv.v.x v8, a2
+; RV64-NEXT: vxor.vi v8, v8, -1
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vor.vv v8, v28, v8
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <8 x i64> %c, <8 x i64> %d
+ ret <8 x i64> %v
+}
+
+define <16 x i64> @select_v16i64(i1 zeroext %c, <16 x i64> %a, <16 x i64> %b) {
+; RV32-LABEL: select_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB40_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB40_2:
+; RV32-NEXT: addi a0, zero, 32
+; RV32-NEXT: vsetvli a2, a0, e32,m8,ta,mu
+; RV32-NEXT: vmv.v.x v24, a1
+; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu
+; RV32-NEXT: vand.vv v8, v8, v24
+; RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; RV32-NEXT: vmv.v.i v0, -1
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: vxor.vv v24, v24, v0
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB40_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB40_2:
+; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV64-NEXT: vand.vx v8, v8, a1
+; RV64-NEXT: vmv.v.x v24, a1
+; RV64-NEXT: vxor.vi v24, v24, -1
+; RV64-NEXT: vand.vv v16, v16, v24
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: ret
+ %v = select i1 %c, <16 x i64> %a, <16 x i64> %b
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @selectcc_v16i64(i64 signext %a, i64 signext %b, <16 x i64> %c, <16 x i64> %d) {
+; RV32-LABEL: selectcc_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB41_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB41_2:
+; RV32-NEXT: addi a1, zero, 32
+; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu
+; RV32-NEXT: vmv.v.x v24, a0
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: vand.vv v8, v8, v24
+; RV32-NEXT: vsetvli a0, a1, e32,m8,ta,mu
+; RV32-NEXT: vmv.v.i v0, -1
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: vxor.vv v24, v24, v0
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB41_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB41_2:
+; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV64-NEXT: vand.vx v8, v8, a2
+; RV64-NEXT: vmv.v.x v24, a2
+; RV64-NEXT: vxor.vi v24, v24, -1
+; RV64-NEXT: vand.vv v16, v16, v24
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <16 x i64> %c, <16 x i64> %d
+ ret <16 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
new file mode 100644
index 0000000000000..1c1b8cc93269d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
@@ -0,0 +1,777 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <vscale x 1 x half> @select_nxv1f16(i1 zeroext %c, <vscale x 1 x half> %a, <vscale x 1 x half> %b) {
+; CHECK-LABEL: select_nxv1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x half> %a, <vscale x 1 x half> %b
+ ret <vscale x 1 x half> %v
+}
+
+define <vscale x 1 x half> @selectcc_nxv1f16(half %a, half %b, <vscale x 1 x half> %c, <vscale x 1 x half> %d) {
+; CHECK-LABEL: selectcc_nxv1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.h a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB1_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a0
+; CHECK-NEXT: vmv.v.x v26, a0
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <vscale x 1 x half> %c, <vscale x 1 x half> %d
+ ret <vscale x 1 x half> %v
+}
+
+define <vscale x 2 x half> @select_nxv2f16(i1 zeroext %c, <vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: select_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB2_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x half> %a, <vscale x 2 x half> %b
+ ret <vscale x 2 x half> %v
+}
+
+define <vscale x 2 x half> @selectcc_nxv2f16(half %a, half %b, <vscale x 2 x half> %c, <vscale x 2 x half> %d) {
+; CHECK-LABEL: selectcc_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.h a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB3_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a0
+; CHECK-NEXT: vmv.v.x v26, a0
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <vscale x 2 x half> %c, <vscale x 2 x half> %d
+ ret <vscale x 2 x half> %v
+}
+
+define <vscale x 4 x half> @select_nxv4f16(i1 zeroext %c, <vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: select_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x half> %a, <vscale x 4 x half> %b
+ ret <vscale x 4 x half> %v
+}
+
+define <vscale x 4 x half> @selectcc_nxv4f16(half %a, half %b, <vscale x 4 x half> %c, <vscale x 4 x half> %d) {
+; CHECK-LABEL: selectcc_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.h a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB5_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a0
+; CHECK-NEXT: vmv.v.x v26, a0
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <vscale x 4 x half> %c, <vscale x 4 x half> %d
+ ret <vscale x 4 x half> %v
+}
+
+define <vscale x 8 x half> @select_nxv8f16(i1 zeroext %c, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: select_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB6_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB6_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a1
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x half> %a, <vscale x 8 x half> %b
+ ret <vscale x 8 x half> %v
+}
+
+define <vscale x 8 x half> @selectcc_nxv8f16(half %a, half %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) {
+; CHECK-LABEL: selectcc_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.h a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB7_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a0
+; CHECK-NEXT: vmv.v.x v28, a0
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <vscale x 8 x half> %c, <vscale x 8 x half> %d
+ ret <vscale x 8 x half> %v
+}
+
+define <vscale x 16 x half> @select_nxv16f16(i1 zeroext %c, <vscale x 16 x half> %a, <vscale x 16 x half> %b) {
+; CHECK-LABEL: select_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB8_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB8_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a1
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 16 x half> %a, <vscale x 16 x half> %b
+ ret <vscale x 16 x half> %v
+}
+
+define <vscale x 16 x half> @selectcc_nxv16f16(half %a, half %b, <vscale x 16 x half> %c, <vscale x 16 x half> %d) {
+; CHECK-LABEL: selectcc_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.h a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB9_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB9_2:
+; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <vscale x 16 x half> %c, <vscale x 16 x half> %d
+ ret <vscale x 16 x half> %v
+}
+
+define <vscale x 32 x half> @select_nxv32f16(i1 zeroext %c, <vscale x 32 x half> %a, <vscale x 32 x half> %b) {
+; CHECK-LABEL: select_nxv32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 32 x half> %a, <vscale x 32 x half> %b
+ ret <vscale x 32 x half> %v
+}
+
+define <vscale x 32 x half> @selectcc_nxv32f16(half %a, half %b, <vscale x 32 x half> %c, <vscale x 32 x half> %d) {
+; CHECK-LABEL: selectcc_nxv32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.h a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v24, a0
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq half %a, %b
+ %v = select i1 %cmp, <vscale x 32 x half> %c, <vscale x 32 x half> %d
+ ret <vscale x 32 x half> %v
+}
+
+define <vscale x 1 x float> @select_nxv1f32(i1 zeroext %c, <vscale x 1 x float> %a, <vscale x 1 x float> %b) {
+; CHECK-LABEL: select_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB12_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x float> %a, <vscale x 1 x float> %b
+ ret <vscale x 1 x float> %v
+}
+
+define <vscale x 1 x float> @selectcc_nxv1f32(float %a, float %b, <vscale x 1 x float> %c, <vscale x 1 x float> %d) {
+; CHECK-LABEL: selectcc_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.s a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB13_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB13_2:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a0
+; CHECK-NEXT: vmv.v.x v26, a0
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <vscale x 1 x float> %c, <vscale x 1 x float> %d
+ ret <vscale x 1 x float> %v
+}
+
+define <vscale x 2 x float> @select_nxv2f32(i1 zeroext %c, <vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: select_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB14_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB14_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x float> %a, <vscale x 2 x float> %b
+ ret <vscale x 2 x float> %v
+}
+
+define <vscale x 2 x float> @selectcc_nxv2f32(float %a, float %b, <vscale x 2 x float> %c, <vscale x 2 x float> %d) {
+; CHECK-LABEL: selectcc_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.s a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB15_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB15_2:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a0
+; CHECK-NEXT: vmv.v.x v26, a0
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <vscale x 2 x float> %c, <vscale x 2 x float> %d
+ ret <vscale x 2 x float> %v
+}
+
+define <vscale x 4 x float> @select_nxv4f32(i1 zeroext %c, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: select_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB16_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB16_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a1
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x float> %a, <vscale x 4 x float> %b
+ ret <vscale x 4 x float> %v
+}
+
+define <vscale x 4 x float> @selectcc_nxv4f32(float %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) {
+; CHECK-LABEL: selectcc_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.s a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB17_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a0
+; CHECK-NEXT: vmv.v.x v28, a0
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <vscale x 4 x float> %c, <vscale x 4 x float> %d
+ ret <vscale x 4 x float> %v
+}
+
+define <vscale x 8 x float> @select_nxv8f32(i1 zeroext %c, <vscale x 8 x float> %a, <vscale x 8 x float> %b) {
+; CHECK-LABEL: select_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB18_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a1
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x float> %a, <vscale x 8 x float> %b
+ ret <vscale x 8 x float> %v
+}
+
+define <vscale x 8 x float> @selectcc_nxv8f32(float %a, float %b, <vscale x 8 x float> %c, <vscale x 8 x float> %d) {
+; CHECK-LABEL: selectcc_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.s a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB19_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB19_2:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <vscale x 8 x float> %c, <vscale x 8 x float> %d
+ ret <vscale x 8 x float> %v
+}
+
+define <vscale x 16 x float> @select_nxv16f32(i1 zeroext %c, <vscale x 16 x float> %a, <vscale x 16 x float> %b) {
+; CHECK-LABEL: select_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB20_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB20_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 16 x float> %a, <vscale x 16 x float> %b
+ ret <vscale x 16 x float> %v
+}
+
+define <vscale x 16 x float> @selectcc_nxv16f32(float %a, float %b, <vscale x 16 x float> %c, <vscale x 16 x float> %d) {
+; CHECK-LABEL: selectcc_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: feq.s a1, fa0, fa1
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: bnez a1, .LBB21_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB21_2:
+; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v24, a0
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq float %a, %b
+ %v = select i1 %cmp, <vscale x 16 x float> %c, <vscale x 16 x float> %d
+ ret <vscale x 16 x float> %v
+}
+
+define <vscale x 1 x double> @select_nxv1f64(i1 zeroext %c, <vscale x 1 x double> %a, <vscale x 1 x double> %b) {
+; RV32-LABEL: select_nxv1f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB22_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB22_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vand.vv v26, v8, v25
+; RV32-NEXT: vxor.vi v25, v25, -1
+; RV32-NEXT: vand.vv v25, v9, v25
+; RV32-NEXT: vor.vv v8, v26, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv1f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB22_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB22_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV64-NEXT: vand.vx v25, v8, a1
+; RV64-NEXT: vmv.v.x v26, a1
+; RV64-NEXT: vxor.vi v26, v26, -1
+; RV64-NEXT: vand.vv v26, v9, v26
+; RV64-NEXT: vor.vv v8, v25, v26
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x double> %a, <vscale x 1 x double> %b
+ ret <vscale x 1 x double> %v
+}
+
+define <vscale x 1 x double> @selectcc_nxv1f64(double %a, double %b, <vscale x 1 x double> %c, <vscale x 1 x double> %d) {
+; RV32-LABEL: selectcc_nxv1f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: feq.d a1, fa0, fa1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB23_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB23_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vand.vv v26, v8, v25
+; RV32-NEXT: vxor.vi v25, v25, -1
+; RV32-NEXT: vand.vv v25, v9, v25
+; RV32-NEXT: vor.vv v8, v26, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv1f64:
+; RV64: # %bb.0:
+; RV64-NEXT: feq.d a1, fa0, fa1
+; RV64-NEXT: addi a0, zero, -1
+; RV64-NEXT: bnez a1, .LBB23_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, zero
+; RV64-NEXT: .LBB23_2:
+; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
+; RV64-NEXT: vand.vx v25, v8, a0
+; RV64-NEXT: vmv.v.x v26, a0
+; RV64-NEXT: vxor.vi v26, v26, -1
+; RV64-NEXT: vand.vv v26, v9, v26
+; RV64-NEXT: vor.vv v8, v25, v26
+; RV64-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <vscale x 1 x double> %c, <vscale x 1 x double> %d
+ ret <vscale x 1 x double> %v
+}
+
+define <vscale x 2 x double> @select_nxv2f64(i1 zeroext %c, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; RV32-LABEL: select_nxv2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB24_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB24_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vand.vv v28, v8, v26
+; RV32-NEXT: vxor.vi v26, v26, -1
+; RV32-NEXT: vand.vv v26, v10, v26
+; RV32-NEXT: vor.vv v8, v28, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB24_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB24_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV64-NEXT: vand.vx v26, v8, a1
+; RV64-NEXT: vmv.v.x v28, a1
+; RV64-NEXT: vxor.vi v28, v28, -1
+; RV64-NEXT: vand.vv v28, v10, v28
+; RV64-NEXT: vor.vv v8, v26, v28
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x double> %a, <vscale x 2 x double> %b
+ ret <vscale x 2 x double> %v
+}
+
+define <vscale x 2 x double> @selectcc_nxv2f64(double %a, double %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) {
+; RV32-LABEL: selectcc_nxv2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: feq.d a1, fa0, fa1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB25_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB25_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vand.vv v28, v8, v26
+; RV32-NEXT: vxor.vi v26, v26, -1
+; RV32-NEXT: vand.vv v26, v10, v26
+; RV32-NEXT: vor.vv v8, v28, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: feq.d a1, fa0, fa1
+; RV64-NEXT: addi a0, zero, -1
+; RV64-NEXT: bnez a1, .LBB25_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, zero
+; RV64-NEXT: .LBB25_2:
+; RV64-NEXT: vsetvli a1, zero, e64,m2,ta,mu
+; RV64-NEXT: vand.vx v26, v8, a0
+; RV64-NEXT: vmv.v.x v28, a0
+; RV64-NEXT: vxor.vi v28, v28, -1
+; RV64-NEXT: vand.vv v28, v10, v28
+; RV64-NEXT: vor.vv v8, v26, v28
+; RV64-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <vscale x 2 x double> %c, <vscale x 2 x double> %d
+ ret <vscale x 2 x double> %v
+}
+
+define <vscale x 4 x double> @select_nxv4f64(i1 zeroext %c, <vscale x 4 x double> %a, <vscale x 4 x double> %b) {
+; RV32-LABEL: select_nxv4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB26_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB26_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v28
+; RV32-NEXT: vxor.vi v28, v28, -1
+; RV32-NEXT: vand.vv v28, v12, v28
+; RV32-NEXT: vor.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB26_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB26_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV64-NEXT: vand.vx v28, v8, a1
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vxor.vi v8, v8, -1
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vor.vv v8, v28, v8
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x double> %a, <vscale x 4 x double> %b
+ ret <vscale x 4 x double> %v
+}
+
+define <vscale x 4 x double> @selectcc_nxv4f64(double %a, double %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
+; RV32-LABEL: selectcc_nxv4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: feq.d a1, fa0, fa1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB27_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB27_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v28
+; RV32-NEXT: vxor.vi v28, v28, -1
+; RV32-NEXT: vand.vv v28, v12, v28
+; RV32-NEXT: vor.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: feq.d a1, fa0, fa1
+; RV64-NEXT: addi a0, zero, -1
+; RV64-NEXT: bnez a1, .LBB27_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, zero
+; RV64-NEXT: .LBB27_2:
+; RV64-NEXT: vsetvli a1, zero, e64,m4,ta,mu
+; RV64-NEXT: vand.vx v28, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: vxor.vi v8, v8, -1
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vor.vv v8, v28, v8
+; RV64-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <vscale x 4 x double> %c, <vscale x 4 x double> %d
+ ret <vscale x 4 x double> %v
+}
+
+define <vscale x 8 x double> @select_nxv8f64(i1 zeroext %c, <vscale x 8 x double> %a, <vscale x 8 x double> %b) {
+; RV32-LABEL: select_nxv8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB28_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB28_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v24, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v24
+; RV32-NEXT: vxor.vi v24, v24, -1
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv8f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB28_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB28_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV64-NEXT: vand.vx v8, v8, a1
+; RV64-NEXT: vmv.v.x v24, a1
+; RV64-NEXT: vxor.vi v24, v24, -1
+; RV64-NEXT: vand.vv v16, v16, v24
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x double> %a, <vscale x 8 x double> %b
+ ret <vscale x 8 x double> %v
+}
+
+define <vscale x 8 x double> @selectcc_nxv8f64(double %a, double %b, <vscale x 8 x double> %c, <vscale x 8 x double> %d) {
+; RV32-LABEL: selectcc_nxv8f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: feq.d a1, fa0, fa1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB29_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB29_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v24, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v24
+; RV32-NEXT: vxor.vi v24, v24, -1
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv8f64:
+; RV64: # %bb.0:
+; RV64-NEXT: feq.d a1, fa0, fa1
+; RV64-NEXT: addi a0, zero, -1
+; RV64-NEXT: bnez a1, .LBB29_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, zero
+; RV64-NEXT: .LBB29_2:
+; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu
+; RV64-NEXT: vand.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v24, a0
+; RV64-NEXT: vxor.vi v24, v24, -1
+; RV64-NEXT: vand.vv v16, v16, v24
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: ret
+ %cmp = fcmp oeq double %a, %b
+ %v = select i1 %cmp, <vscale x 8 x double> %c, <vscale x 8 x double> %d
+ ret <vscale x 8 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
new file mode 100644
index 0000000000000..89c20e85d6faf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
@@ -0,0 +1,1330 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <vscale x 1 x i1> @select_nxv1i1(i1 zeroext %c, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b) {
+; CHECK-LABEL: select_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @selectcc_nxv1i1(i1 signext %a, i1 signext %b, <vscale x 1 x i1> %c, <vscale x 1 x i1> %d) {
+; CHECK-LABEL: selectcc_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB1_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <vscale x 1 x i1> %c, <vscale x 1 x i1> %d
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 2 x i1> @select_nxv2i1(i1 zeroext %c, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: select_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB2_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b
+ ret <vscale x 2 x i1> %v
+}
+
+define <vscale x 2 x i1> @selectcc_nxv2i1(i1 signext %a, i1 signext %b, <vscale x 2 x i1> %c, <vscale x 2 x i1> %d) {
+; CHECK-LABEL: selectcc_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB3_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <vscale x 2 x i1> %c, <vscale x 2 x i1> %d
+ ret <vscale x 2 x i1> %v
+}
+
+define <vscale x 4 x i1> @select_nxv4i1(i1 zeroext %c, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: select_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b
+ ret <vscale x 4 x i1> %v
+}
+
+define <vscale x 4 x i1> @selectcc_nxv4i1(i1 signext %a, i1 signext %b, <vscale x 4 x i1> %c, <vscale x 4 x i1> %d) {
+; CHECK-LABEL: selectcc_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB5_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <vscale x 4 x i1> %c, <vscale x 4 x i1> %d
+ ret <vscale x 4 x i1> %v
+}
+
+define <vscale x 8 x i1> @select_nxv8i1(i1 zeroext %c, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: select_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB6_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB6_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @selectcc_nxv8i1(i1 signext %a, i1 signext %b, <vscale x 8 x i1> %c, <vscale x 8 x i1> %d) {
+; CHECK-LABEL: selectcc_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB7_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vmandnot.mm v25, v8, v26
+; CHECK-NEXT: vmand.mm v26, v0, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <vscale x 8 x i1> %c, <vscale x 8 x i1> %d
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 16 x i1> @select_nxv16i1(i1 zeroext %c, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: select_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB8_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB8_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vmandnot.mm v26, v8, v25
+; CHECK-NEXT: vmand.mm v25, v0, v25
+; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b
+ ret <vscale x 16 x i1> %v
+}
+
+define <vscale x 16 x i1> @selectcc_nxv16i1(i1 signext %a, i1 signext %b, <vscale x 16 x i1> %c, <vscale x 16 x i1> %d) {
+; CHECK-LABEL: selectcc_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB9_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB9_2:
+; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
+; CHECK-NEXT: vmv.v.x v26, a0
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vmandnot.mm v26, v8, v25
+; CHECK-NEXT: vmand.mm v25, v0, v25
+; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <vscale x 16 x i1> %c, <vscale x 16 x i1> %d
+ ret <vscale x 16 x i1> %v
+}
+
+define <vscale x 32 x i1> @select_nxv32i1(i1 zeroext %c, <vscale x 32 x i1> %a, <vscale x 32 x i1> %b) {
+; CHECK-LABEL: select_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vmsne.vi v25, v28, 0
+; CHECK-NEXT: vmandnot.mm v26, v8, v25
+; CHECK-NEXT: vmand.mm v25, v0, v25
+; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 32 x i1> %a, <vscale x 32 x i1> %b
+ ret <vscale x 32 x i1> %v
+}
+
+define <vscale x 32 x i1> @selectcc_nxv32i1(i1 signext %a, i1 signext %b, <vscale x 32 x i1> %c, <vscale x 32 x i1> %d) {
+; CHECK-LABEL: selectcc_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu
+; CHECK-NEXT: vmv.v.x v28, a0
+; CHECK-NEXT: vmsne.vi v25, v28, 0
+; CHECK-NEXT: vmandnot.mm v26, v8, v25
+; CHECK-NEXT: vmand.mm v25, v0, v25
+; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <vscale x 32 x i1> %c, <vscale x 32 x i1> %d
+ ret <vscale x 32 x i1> %v
+}
+
+define <vscale x 64 x i1> @select_nxv64i1(i1 zeroext %c, <vscale x 64 x i1> %a, <vscale x 64 x i1> %b) {
+; CHECK-LABEL: select_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: bnez a0, .LBB12_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmv.v.x v16, a1
+; CHECK-NEXT: vmsne.vi v25, v16, 0
+; CHECK-NEXT: vmandnot.mm v26, v8, v25
+; CHECK-NEXT: vmand.mm v25, v0, v25
+; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 64 x i1> %a, <vscale x 64 x i1> %b
+ ret <vscale x 64 x i1> %v
+}
+
+define <vscale x 64 x i1> @selectcc_nxv64i1(i1 signext %a, i1 signext %b, <vscale x 64 x i1> %c, <vscale x 64 x i1> %d) {
+; CHECK-LABEL: selectcc_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: andi a1, a0, 1
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: bnez a1, .LBB13_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, zero
+; CHECK-NEXT: .LBB13_2:
+; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmsne.vi v25, v16, 0
+; CHECK-NEXT: vmandnot.mm v26, v8, v25
+; CHECK-NEXT: vmand.mm v25, v0, v25
+; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i1 %a, %b
+ %v = select i1 %cmp, <vscale x 64 x i1> %c, <vscale x 64 x i1> %d
+ ret <vscale x 64 x i1> %v
+}
+
+define <vscale x 1 x i8> @select_nxv1i8(i1 zeroext %c, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b) {
+; CHECK-LABEL: select_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB14_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB14_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @selectcc_nxv1i8(i8 signext %a, i8 signext %b, <vscale x 1 x i8> %c, <vscale x 1 x i8> %d) {
+; CHECK-LABEL: selectcc_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB15_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB15_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <vscale x 1 x i8> %c, <vscale x 1 x i8> %d
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 2 x i8> @select_nxv2i8(i1 zeroext %c, <vscale x 2 x i8> %a, <vscale x 2 x i8> %b) {
+; CHECK-LABEL: select_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB16_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB16_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x i8> %a, <vscale x 2 x i8> %b
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @selectcc_nxv2i8(i8 signext %a, i8 signext %b, <vscale x 2 x i8> %c, <vscale x 2 x i8> %d) {
+; CHECK-LABEL: selectcc_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB17_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <vscale x 2 x i8> %c, <vscale x 2 x i8> %d
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 4 x i8> @select_nxv4i8(i1 zeroext %c, <vscale x 4 x i8> %a, <vscale x 4 x i8> %b) {
+; CHECK-LABEL: select_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB18_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x i8> %a, <vscale x 4 x i8> %b
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @selectcc_nxv4i8(i8 signext %a, i8 signext %b, <vscale x 4 x i8> %c, <vscale x 4 x i8> %d) {
+; CHECK-LABEL: selectcc_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB19_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB19_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <vscale x 4 x i8> %c, <vscale x 4 x i8> %d
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 8 x i8> @select_nxv8i8(i1 zeroext %c, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: select_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB20_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB20_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @selectcc_nxv8i8(i8 signext %a, i8 signext %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d) {
+; CHECK-LABEL: selectcc_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB21_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB21_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 16 x i8> @select_nxv16i8(i1 zeroext %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: select_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a1
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @selectcc_nxv16i8(i8 signext %a, i8 signext %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: selectcc_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a2
+; CHECK-NEXT: vmv.v.x v28, a2
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x i8> @select_nxv32i8(i1 zeroext %c, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
+; CHECK-LABEL: select_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB24_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB24_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a1
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 32 x i8> %a, <vscale x 32 x i8> %b
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @selectcc_nxv32i8(i8 signext %a, i8 signext %b, <vscale x 32 x i8> %c, <vscale x 32 x i8> %d) {
+; CHECK-LABEL: selectcc_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a2
+; CHECK-NEXT: vmv.v.x v8, a2
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <vscale x 32 x i8> %c, <vscale x 32 x i8> %d
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 64 x i8> @select_nxv64i8(i1 zeroext %c, <vscale x 64 x i8> %a, <vscale x 64 x i8> %b) {
+; CHECK-LABEL: select_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB26_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 64 x i8> %a, <vscale x 64 x i8> %b
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @selectcc_nxv64i8(i8 signext %a, i8 signext %b, <vscale x 64 x i8> %c, <vscale x 64 x i8> %d) {
+; CHECK-LABEL: selectcc_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB27_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vmv.v.x v24, a2
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %cmp = icmp ne i8 %a, %b
+ %v = select i1 %cmp, <vscale x 64 x i8> %c, <vscale x 64 x i8> %d
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 1 x i16> @select_nxv1i16(i1 zeroext %c, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b) {
+; CHECK-LABEL: select_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB28_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB28_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @selectcc_nxv1i16(i16 signext %a, i16 signext %b, <vscale x 1 x i16> %c, <vscale x 1 x i16> %d) {
+; CHECK-LABEL: selectcc_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB29_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB29_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <vscale x 1 x i16> %c, <vscale x 1 x i16> %d
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 2 x i16> @select_nxv2i16(i1 zeroext %c, <vscale x 2 x i16> %a, <vscale x 2 x i16> %b) {
+; CHECK-LABEL: select_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB30_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB30_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x i16> %a, <vscale x 2 x i16> %b
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @selectcc_nxv2i16(i16 signext %a, i16 signext %b, <vscale x 2 x i16> %c, <vscale x 2 x i16> %d) {
+; CHECK-LABEL: selectcc_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB31_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB31_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <vscale x 2 x i16> %c, <vscale x 2 x i16> %d
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 4 x i16> @select_nxv4i16(i1 zeroext %c, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
+; CHECK-LABEL: select_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @selectcc_nxv4i16(i16 signext %a, i16 signext %b, <vscale x 4 x i16> %c, <vscale x 4 x i16> %d) {
+; CHECK-LABEL: selectcc_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <vscale x 4 x i16> %c, <vscale x 4 x i16> %d
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i16> @select_nxv8i16(i1 zeroext %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: select_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB34_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB34_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a1
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @selectcc_nxv8i16(i16 signext %a, i16 signext %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) {
+; CHECK-LABEL: selectcc_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB35_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB35_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a2
+; CHECK-NEXT: vmv.v.x v28, a2
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 16 x i16> @select_nxv16i16(i1 zeroext %c, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b) {
+; CHECK-LABEL: select_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB36_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB36_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a1
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 16 x i16> %a, <vscale x 16 x i16> %b
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @selectcc_nxv16i16(i16 signext %a, i16 signext %b, <vscale x 16 x i16> %c, <vscale x 16 x i16> %d) {
+; CHECK-LABEL: selectcc_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB37_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB37_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a2
+; CHECK-NEXT: vmv.v.x v8, a2
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <vscale x 16 x i16> %c, <vscale x 16 x i16> %d
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 32 x i16> @select_nxv32i16(i1 zeroext %c, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
+; CHECK-LABEL: select_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB38_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB38_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 32 x i16> %a, <vscale x 32 x i16> %b
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @selectcc_nxv32i16(i16 signext %a, i16 signext %b, <vscale x 32 x i16> %c, <vscale x 32 x i16> %d) {
+; CHECK-LABEL: selectcc_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB39_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB39_2:
+; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vmv.v.x v24, a2
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %cmp = icmp ne i16 %a, %b
+ %v = select i1 %cmp, <vscale x 32 x i16> %c, <vscale x 32 x i16> %d
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 1 x i32> @select_nxv1i32(i1 zeroext %c, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b) {
+; CHECK-LABEL: select_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB40_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB40_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @selectcc_nxv1i32(i32 signext %a, i32 signext %b, <vscale x 1 x i32> %c, <vscale x 1 x i32> %d) {
+; CHECK-LABEL: selectcc_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB41_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB41_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <vscale x 1 x i32> %c, <vscale x 1 x i32> %d
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 2 x i32> @select_nxv2i32(i1 zeroext %c, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: select_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB42_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB42_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a1
+; CHECK-NEXT: vmv.v.x v26, a1
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @selectcc_nxv2i32(i32 signext %a, i32 signext %b, <vscale x 2 x i32> %c, <vscale x 2 x i32> %d) {
+; CHECK-LABEL: selectcc_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB43_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB43_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vand.vx v25, v8, a2
+; CHECK-NEXT: vmv.v.x v26, a2
+; CHECK-NEXT: vxor.vi v26, v26, -1
+; CHECK-NEXT: vand.vv v26, v9, v26
+; CHECK-NEXT: vor.vv v8, v25, v26
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <vscale x 2 x i32> %c, <vscale x 2 x i32> %d
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 4 x i32> @select_nxv4i32(i1 zeroext %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: select_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB44_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB44_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a1
+; CHECK-NEXT: vmv.v.x v28, a1
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @selectcc_nxv4i32(i32 signext %a, i32 signext %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) {
+; CHECK-LABEL: selectcc_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB45_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB45_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vand.vx v26, v8, a2
+; CHECK-NEXT: vmv.v.x v28, a2
+; CHECK-NEXT: vxor.vi v28, v28, -1
+; CHECK-NEXT: vand.vv v28, v10, v28
+; CHECK-NEXT: vor.vv v8, v26, v28
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 8 x i32> @select_nxv8i32(i1 zeroext %c, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: select_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB46_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB46_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a1
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x i32> %a, <vscale x 8 x i32> %b
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @selectcc_nxv8i32(i32 signext %a, i32 signext %b, <vscale x 8 x i32> %c, <vscale x 8 x i32> %d) {
+; CHECK-LABEL: selectcc_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB47_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB47_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vand.vx v28, v8, a2
+; CHECK-NEXT: vmv.v.x v8, a2
+; CHECK-NEXT: vxor.vi v8, v8, -1
+; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vor.vv v8, v28, v8
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <vscale x 8 x i32> %c, <vscale x 8 x i32> %d
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 16 x i32> @select_nxv16i32(i1 zeroext %c, <vscale x 16 x i32> %a, <vscale x 16 x i32> %b) {
+; CHECK-LABEL: select_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: bnez a0, .LBB48_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, zero
+; CHECK-NEXT: .LBB48_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 16 x i32> %a, <vscale x 16 x i32> %b
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @selectcc_nxv16i32(i32 signext %a, i32 signext %b, <vscale x 16 x i32> %c, <vscale x 16 x i32> %d) {
+; CHECK-LABEL: selectcc_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, -1
+; CHECK-NEXT: bne a0, a1, .LBB49_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: .LBB49_2:
+; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vmv.v.x v24, a2
+; CHECK-NEXT: vxor.vi v24, v24, -1
+; CHECK-NEXT: vand.vv v16, v16, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %a, %b
+ %v = select i1 %cmp, <vscale x 16 x i32> %c, <vscale x 16 x i32> %d
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 1 x i64> @select_nxv1i64(i1 zeroext %c, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b) {
+; RV32-LABEL: select_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB50_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB50_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vand.vv v26, v8, v25
+; RV32-NEXT: vxor.vi v25, v25, -1
+; RV32-NEXT: vand.vv v25, v9, v25
+; RV32-NEXT: vor.vv v8, v26, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB50_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB50_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV64-NEXT: vand.vx v25, v8, a1
+; RV64-NEXT: vmv.v.x v26, a1
+; RV64-NEXT: vxor.vi v26, v26, -1
+; RV64-NEXT: vand.vv v26, v9, v26
+; RV64-NEXT: vor.vv v8, v25, v26
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @selectcc_nxv1i64(i64 signext %a, i64 signext %b, <vscale x 1 x i64> %c, <vscale x 1 x i64> %d) {
+; RV32-LABEL: selectcc_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB51_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB51_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vand.vv v26, v8, v25
+; RV32-NEXT: vxor.vi v25, v25, -1
+; RV32-NEXT: vand.vv v25, v9, v25
+; RV32-NEXT: vor.vv v8, v26, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB51_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB51_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV64-NEXT: vand.vx v25, v8, a2
+; RV64-NEXT: vmv.v.x v26, a2
+; RV64-NEXT: vxor.vi v26, v26, -1
+; RV64-NEXT: vand.vv v26, v9, v26
+; RV64-NEXT: vor.vv v8, v25, v26
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <vscale x 1 x i64> %c, <vscale x 1 x i64> %d
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 2 x i64> @select_nxv2i64(i1 zeroext %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; RV32-LABEL: select_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB52_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB52_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vand.vv v28, v8, v26
+; RV32-NEXT: vxor.vi v26, v26, -1
+; RV32-NEXT: vand.vv v26, v10, v26
+; RV32-NEXT: vor.vv v8, v28, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB52_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB52_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV64-NEXT: vand.vx v26, v8, a1
+; RV64-NEXT: vmv.v.x v28, a1
+; RV64-NEXT: vxor.vi v28, v28, -1
+; RV64-NEXT: vand.vv v28, v10, v28
+; RV64-NEXT: vor.vv v8, v26, v28
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @selectcc_nxv2i64(i64 signext %a, i64 signext %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
+; RV32-LABEL: selectcc_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB53_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB53_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vand.vv v28, v8, v26
+; RV32-NEXT: vxor.vi v26, v26, -1
+; RV32-NEXT: vand.vv v26, v10, v26
+; RV32-NEXT: vor.vv v8, v28, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB53_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB53_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV64-NEXT: vand.vx v26, v8, a2
+; RV64-NEXT: vmv.v.x v28, a2
+; RV64-NEXT: vxor.vi v28, v28, -1
+; RV64-NEXT: vand.vv v28, v10, v28
+; RV64-NEXT: vor.vv v8, v26, v28
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 4 x i64> @select_nxv4i64(i1 zeroext %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
+; RV32-LABEL: select_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB54_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB54_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v28
+; RV32-NEXT: vxor.vi v28, v28, -1
+; RV32-NEXT: vand.vv v28, v12, v28
+; RV32-NEXT: vor.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB54_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB54_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV64-NEXT: vand.vx v28, v8, a1
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vxor.vi v8, v8, -1
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vor.vv v8, v28, v8
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @selectcc_nxv4i64(i64 signext %a, i64 signext %b, <vscale x 4 x i64> %c, <vscale x 4 x i64> %d) {
+; RV32-LABEL: selectcc_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB55_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB55_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v28
+; RV32-NEXT: vxor.vi v28, v28, -1
+; RV32-NEXT: vand.vv v28, v12, v28
+; RV32-NEXT: vor.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB55_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB55_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV64-NEXT: vand.vx v28, v8, a2
+; RV64-NEXT: vmv.v.x v8, a2
+; RV64-NEXT: vxor.vi v8, v8, -1
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vor.vv v8, v28, v8
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <vscale x 4 x i64> %c, <vscale x 4 x i64> %d
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 8 x i64> @select_nxv8i64(i1 zeroext %c, <vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
+; RV32-LABEL: select_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: addi a1, zero, -1
+; RV32-NEXT: bnez a0, .LBB56_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: .LBB56_2:
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v24, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v24
+; RV32-NEXT: vxor.vi v24, v24, -1
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: bnez a0, .LBB56_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: .LBB56_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV64-NEXT: vand.vx v8, v8, a1
+; RV64-NEXT: vmv.v.x v24, a1
+; RV64-NEXT: vxor.vi v24, v24, -1
+; RV64-NEXT: vand.vv v16, v16, v24
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x i64> %a, <vscale x 8 x i64> %b
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @selectcc_nxv8i64(i64 signext %a, i64 signext %b, <vscale x 8 x i64> %c, <vscale x 8 x i64> %d) {
+; RV32-LABEL: selectcc_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: or a1, a0, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: bnez a1, .LBB57_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, zero
+; RV32-NEXT: .LBB57_2:
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v24, (a0), zero
+; RV32-NEXT: vand.vv v8, v8, v24
+; RV32-NEXT: vxor.vi v24, v24, -1
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: selectcc_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a2, zero, -1
+; RV64-NEXT: bne a0, a1, .LBB57_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: .LBB57_2:
+; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV64-NEXT: vand.vx v8, v8, a2
+; RV64-NEXT: vmv.v.x v24, a2
+; RV64-NEXT: vxor.vi v24, v24, -1
+; RV64-NEXT: vand.vv v16, v16, v24
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: ret
+ %cmp = icmp ne i64 %a, %b
+ %v = select i1 %cmp, <vscale x 8 x i64> %c, <vscale x 8 x i64> %d
+ ret <vscale x 8 x i64> %v
+}
More information about the llvm-commits
mailing list