[llvm] d5ab38f - [RISCV] Support select/merge like ops for bf16 vectors when have Zvfbfmin (#91936)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 5 19:33:58 PDT 2024
Author: Jianjian Guan
Date: 2024-06-06T10:33:54+08:00
New Revision: d5ab38f69c1a5c1456bc1a8cc936489d31599f33
URL: https://github.com/llvm/llvm-project/commit/d5ab38f69c1a5c1456bc1a8cc936489d31599f33
DIFF: https://github.com/llvm/llvm-project/commit/d5ab38f69c1a5c1456bc1a8cc936489d31599f33.diff
LOG: [RISCV] Support select/merge like ops for bf16 vectors when have Zvfbfmin (#91936)
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
llvm/test/CodeGen/RISCV/rvv/select-fp.ll
llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8ace5d79af079..4051279fdbf8e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1102,6 +1102,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXTRACT_SUBVECTOR},
VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ if (Subtarget.hasStdExtZfbfmin()) {
+ if (Subtarget.hasVInstructionsF16())
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+ else if (Subtarget.hasVInstructionsF16Minimal())
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ }
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
// TODO: Promote to fp32.
}
}
@@ -1331,6 +1340,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXTRACT_SUBVECTOR},
VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ if (Subtarget.hasStdExtZfbfmin()) {
+ if (Subtarget.hasVInstructionsF16())
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+ else if (Subtarget.hasVInstructionsF16Minimal())
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ }
+ setOperationAction(
+ {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
+ Custom);
// TODO: Promote to fp32.
continue;
}
@@ -6704,10 +6722,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
- if (Op.getValueType().getScalarType() == MVT::f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16())) {
- if (Op.getValueType() == MVT::nxv32f16)
+ if ((Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ Subtarget.hasStdExtZfhminOrZhinxmin() &&
+ !Subtarget.hasVInstructionsF16())) ||
+ (Op.getValueType().getScalarType() == MVT::bf16 &&
+ (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin() &&
+ Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))) {
+ if (Op.getValueType() == MVT::nxv32f16 ||
+ Op.getValueType() == MVT::nxv32bf16)
return SplitVectorOp(Op, DAG);
SDLoc DL(Op);
SDValue NewScalar =
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index b0949f5fc1d72..fe4d839e4fdcb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -382,7 +382,20 @@ class GetIntVTypeInfo<VTypeInfo vti> {
// Equivalent integer vector type. Eg.
// VI8M1 → VI8M1 (identity)
// VF64M4 → VI64M4
- VTypeInfo Vti = !cast<VTypeInfo>(!subst("VF", "VI", !cast<string>(vti)));
+ VTypeInfo Vti = !cast<VTypeInfo>(!subst("VBF", "VI",
+ !subst("VF", "VI",
+ !cast<string>(vti))));
+}
+
+// This functor is used to obtain the fp vector type that has the same SEW and
+// multiplier as the input parameter type.
+class GetFpVTypeInfo<VTypeInfo vti> {
+ // Equivalent integer vector type. Eg.
+ // VF16M1 → VF16M1 (identity)
+ // VBF16M1 → VF16M1
+ VTypeInfo Vti = !cast<VTypeInfo>(!subst("VBF", "VF",
+ !subst("VI", "VF",
+ !cast<string>(vti))));
}
class MTypeInfo<ValueType Mas, LMULInfo M, string Bx> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 3163e4bafd4b0..497c4aadf7535 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1394,7 +1394,7 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
// Floating-point vselects:
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
-foreach fvti = AllFloatVectors in {
+foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
@@ -1412,7 +1412,9 @@ foreach fvti = AllFloatVectors in {
fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
}
- let Predicates = GetVTypePredicates<fvti>.Predicates in
+
+ let Predicates = !listconcat(GetVTypePredicates<GetFpVTypeInfo<fvti>.Vti>.Predicates,
+ GetVTypeScalarPredicates<fvti>.Predicates) in
def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2)),
@@ -1475,7 +1477,7 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
//===----------------------------------------------------------------------===//
foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ let Predicates = !listconcat(GetVTypePredicates<GetFpVTypeInfo<fvti>.Vti>.Predicates,
GetVTypeScalarPredicates<fvti>.Predicates) in
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index ce8133a5a297b..70d8265e7be46 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2604,7 +2604,7 @@ foreach vti = AllFloatVectors in {
}
}
-foreach fvti = AllFloatVectors in {
+foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
// Floating-point vselects:
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
@@ -2639,7 +2639,8 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
}
- let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ let Predicates = !listconcat(GetVTypePredicates<GetFpVTypeInfo<fvti>.Vti>.Predicates,
+ GetVTypeScalarPredicates<fvti>.Predicates) in {
def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
index d945cf5616981..7a96aad31f084 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) {
@@ -343,3 +343,123 @@ define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <1
%v = select i1 %cmp, <16 x double> %c, <16 x double> %d
ret <16 x double> %v
}
+
+define <2 x bfloat> @select_v2bf16(i1 zeroext %c, <2 x bfloat> %a, <2 x bfloat> %b) {
+; CHECK-LABEL: select_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <2 x bfloat> %a, <2 x bfloat> %b
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @selectcc_v2bf16(bfloat %a, bfloat %b, <2 x bfloat> %c, <2 x bfloat> %d) {
+; CHECK-LABEL: selectcc_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <2 x bfloat> %c, <2 x bfloat> %d
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @select_v4bf16(i1 zeroext %c, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: select_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <4 x bfloat> %a, <4 x bfloat> %b
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @selectcc_v4bf16(bfloat %a, bfloat %b, <4 x bfloat> %c, <4 x bfloat> %d) {
+; CHECK-LABEL: selectcc_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <4 x bfloat> %c, <4 x bfloat> %d
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @select_v8bf16(i1 zeroext %c, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: select_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <8 x bfloat> %a, <8 x bfloat> %b
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @selectcc_v8bf16(bfloat %a, bfloat %b, <8 x bfloat> %c, <8 x bfloat> %d) {
+; CHECK-LABEL: selectcc_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <8 x bfloat> %c, <8 x bfloat> %d
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @select_v16bf16(i1 zeroext %c, <16 x bfloat> %a, <16 x bfloat> %b) {
+; CHECK-LABEL: select_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmsne.vi v0, v12, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <16 x bfloat> %a, <16 x bfloat> %b
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @selectcc_v16bf16(bfloat %a, bfloat %b, <16 x bfloat> %c, <16 x bfloat> %d) {
+; CHECK-LABEL: selectcc_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmsne.vi v0, v12, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <16 x bfloat> %c, <16 x bfloat> %d
+ ret <16 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 9f0561b394b81..d360c3f635b5c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN
declare <4 x i1> @llvm.vp.merge.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32)
@@ -1240,3 +1240,139 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
%v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
ret <32 x double> %v
}
+
+declare <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32)
+
+define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpmerge_vf_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpmerge_vf_v2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32)
+
+define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpmerge_vf_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpmerge_vf_v4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32)
+
+define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpmerge_vf_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpmerge_vf_v8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+declare <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32)
+
+define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpmerge_vf_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpmerge_vf_v16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl)
+ ret <16 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
index 0a2ed3eb1ffbf..c5d9cdacae749 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
@@ -683,3 +683,51 @@ define <16 x double> @select_v16f64(<16 x i1> %a, <16 x double> %b, <16 x double
%v = call <16 x double> @llvm.vp.select.v16f64(<16 x i1> %a, <16 x double> %b, <16 x double> %c, i32 %evl)
ret <16 x double> %v
}
+
+declare <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32)
+
+define <2 x bfloat> @select_v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32)
+
+define <4 x bfloat> @select_v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32)
+
+define <8 x bfloat> @select_v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+declare <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32)
+
+define <16 x bfloat> @select_v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 %evl)
+ ret <16 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
index f8581d8e21b39..2b9d847a9e873 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <vscale x 1 x half> @select_nxv1f16(i1 zeroext %c, <vscale x 1 x half> %a, <vscale x 1 x half> %b) {
@@ -427,3 +427,183 @@ define <vscale x 8 x double> @selectcc_nxv8f64(double %a, double %b, <vscale x 8
%v = select i1 %cmp, <vscale x 8 x double> %c, <vscale x 8 x double> %d
ret <vscale x 8 x double> %v
}
+
+define <vscale x 1 x bfloat> @select_nxv1bf16(i1 zeroext %c, <vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
+; CHECK-LABEL: select_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @selectcc_nxv1bf16(bfloat %a, bfloat %b, <vscale x 1 x bfloat> %c, <vscale x 1 x bfloat> %d) {
+; CHECK-LABEL: selectcc_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <vscale x 1 x bfloat> %c, <vscale x 1 x bfloat> %d
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @select_nxv2bf16(i1 zeroext %c, <vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; CHECK-LABEL: select_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @selectcc_nxv2bf16(bfloat %a, bfloat %b, <vscale x 2 x bfloat> %c, <vscale x 2 x bfloat> %d) {
+; CHECK-LABEL: selectcc_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <vscale x 2 x bfloat> %c, <vscale x 2 x bfloat> %d
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @select_nxv4bf16(i1 zeroext %c, <vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; CHECK-LABEL: select_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @selectcc_nxv4bf16(bfloat %a, bfloat %b, <vscale x 4 x bfloat> %c, <vscale x 4 x bfloat> %d) {
+; CHECK-LABEL: selectcc_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <vscale x 4 x bfloat> %c, <vscale x 4 x bfloat> %d
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @select_nxv8bf16(i1 zeroext %c, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: select_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmsne.vi v0, v12, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @selectcc_nxv8bf16(bfloat %a, bfloat %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d) {
+; CHECK-LABEL: selectcc_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmsne.vi v0, v12, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @select_nxv16bf16(i1 zeroext %c, <vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
+; CHECK-LABEL: select_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmsne.vi v0, v16, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @selectcc_nxv16bf16(bfloat %a, bfloat %b, <vscale x 16 x bfloat> %c, <vscale x 16 x bfloat> %d) {
+; CHECK-LABEL: selectcc_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmsne.vi v0, v16, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <vscale x 16 x bfloat> %c, <vscale x 16 x bfloat> %d
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @select_nxv32bf16(i1 zeroext %c, <vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
+; CHECK-LABEL: select_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v24, a0
+; CHECK-NEXT: vmsne.vi v0, v24, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: ret
+ %v = select i1 %c, <vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @selectcc_nxv32bf16(bfloat %a, bfloat %b, <vscale x 32 x bfloat> %c, <vscale x 32 x bfloat> %d) {
+; CHECK-LABEL: selectcc_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v24, a0
+; CHECK-NEXT: vmsne.vi v0, v24, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq bfloat %a, %b
+ %v = select i1 %cmp, <vscale x 32 x bfloat> %c, <vscale x 32 x bfloat> %d
+ ret <vscale x 32 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 094e6c9cc754f..e33c795169fab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN
declare <vscale x 1 x i1> @llvm.vp.merge.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
@@ -1547,3 +1547,303 @@ define <vscale x 8 x double> @vpmerge_vf_nxv8f64(double %a, <vscale x 8 x double
%v = call <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1> %m, <vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 %evl)
ret <vscale x 8 x double> %v
}
+
+declare <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, i32)
+
+define <vscale x 1 x bfloat> @vpmerge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1> %m, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vpmerge_vf_nxv1bf16(bfloat %a, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32ZVFH-LABEL: vpmerge_vf_nxv1bf16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vpmerge_vf_nxv1bf16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
+; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
+; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; RV64ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1> %m, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
+
+define <vscale x 2 x bfloat> @vpmerge_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1> %m, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vpmerge_vf_nxv2bf16(bfloat %a, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32ZVFH-LABEL: vpmerge_vf_nxv2bf16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vpmerge_vf_nxv2bf16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
+; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
+; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; RV64ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1> %m, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
+
+define <vscale x 4 x bfloat> @vpmerge_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1> %m, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vpmerge_vf_nxv4bf16(bfloat %a, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32ZVFH-LABEL: vpmerge_vf_nxv4bf16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vpmerge_vf_nxv4bf16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV32ZVFHMIN-NEXT: vfmv.v.f v10, fa5
+; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV64ZVFHMIN-NEXT: vfmv.v.f v10, fa5
+; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; RV64ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1> %m, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
+
+define <vscale x 8 x bfloat> @vpmerge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> %m, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vpmerge_vf_nxv8bf16(bfloat %a, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32ZVFH-LABEL: vpmerge_vf_nxv8bf16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vpmerge_vf_nxv8bf16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV32ZVFHMIN-NEXT: vfmv.v.f v12, fa5
+; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
+; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV64ZVFHMIN-NEXT: vfmv.v.f v12, fa5
+; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
+; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; RV64ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> %m, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32)
+
+define <vscale x 16 x bfloat> @vpmerge_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
+; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1> %m, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vpmerge_vf_nxv16bf16(bfloat %a, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32ZVFH-LABEL: vpmerge_vf_nxv16bf16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma
+; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vpmerge_vf_nxv16bf16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma
+; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV32ZVFHMIN-NEXT: vfmv.v.f v16, fa5
+; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu
+; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV64ZVFHMIN-NEXT: vfmv.v.f v16, fa5
+; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu
+; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
+; RV64ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1> %m, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i32)
+
+define <vscale x 32 x bfloat> @vpmerge_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1> %m, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vpmerge_vf_nxv32bf16(bfloat %a, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; RV32ZVFH-LABEL: vpmerge_vf_nxv32bf16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vpmerge_vf_nxv32bf16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV32ZVFHMIN-NEXT: vfmv.v.f v24, fa5
+; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24
+; RV32ZVFHMIN-NEXT: vmv.v.v v20, v16
+; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV64ZVFHMIN-NEXT: vfmv.v.f v24, fa5
+; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24
+; RV64ZVFHMIN-NEXT: vmv.v.v v20, v16
+; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; RV64ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0
+; RV64ZVFHMIN-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %a, i32 0
+ %va = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1> %m, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
index 53b8e4a78b756..82c2fe3273bdf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN
define <vscale x 1 x half> @vfmerge_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %cond) {
@@ -512,3 +512,219 @@ define void @vselect_legalize_regression(<vscale x 16 x double> %a, <vscale x 16
store <vscale x 16 x double> %sel, ptr %out
ret void
}
+
+define <vscale x 1 x bfloat> @vfmerge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %cond) {
+; CHECK-LABEL: vfmerge_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %vc = select <vscale x 1 x i1> %cond, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfmerge_fv_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %cond) {
+; CHECK-ZVFH-LABEL: vfmerge_fv_nxv1bf16:
+; CHECK-ZVFH: # %bb.0:
+; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-ZVFH-NEXT: ret
+;
+; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv1bf16:
+; CHECK-ZVFHMIN: # %bb.0:
+; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-ZVFHMIN-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = select <vscale x 1 x i1> %cond, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %va
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfmerge_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %cond) {
+; CHECK-LABEL: vfmerge_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %vc = select <vscale x 2 x i1> %cond, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfmerge_fv_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %cond) {
+; CHECK-ZVFH-LABEL: vfmerge_fv_nxv2bf16:
+; CHECK-ZVFH: # %bb.0:
+; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-ZVFH-NEXT: ret
+;
+; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv2bf16:
+; CHECK-ZVFHMIN: # %bb.0:
+; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-ZVFHMIN-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = select <vscale x 2 x i1> %cond, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfmerge_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %cond) {
+; CHECK-LABEL: vfmerge_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %vc = select <vscale x 4 x i1> %cond, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfmerge_fv_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %cond) {
+; CHECK-ZVFH-LABEL: vfmerge_fv_nxv4bf16:
+; CHECK-ZVFH: # %bb.0:
+; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-ZVFH-NEXT: ret
+;
+; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv4bf16:
+; CHECK-ZVFHMIN: # %bb.0:
+; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-ZVFHMIN-NEXT: vfmv.v.f v10, fa5
+; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-ZVFHMIN-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = select <vscale x 4 x i1> %cond, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %va
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmerge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %cond) {
+; CHECK-LABEL: vfmerge_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vc = select <vscale x 8 x i1> %cond, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmerge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %cond) {
+; CHECK-ZVFH-LABEL: vfmerge_fv_nxv8bf16:
+; CHECK-ZVFH: # %bb.0:
+; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-ZVFH-NEXT: ret
+;
+; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv8bf16:
+; CHECK-ZVFHMIN: # %bb.0:
+; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-ZVFHMIN-NEXT: vfmv.v.f v12, fa5
+; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-ZVFHMIN-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = select <vscale x 8 x i1> %cond, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmerge_zv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %cond) {
+; CHECK-LABEL: vfmerge_zv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT: ret
+ %vc = select <vscale x 8 x i1> %cond, <vscale x 8 x bfloat> splat (bfloat zeroinitializer), <vscale x 8 x bfloat> %va
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vmerge_truelhs_nxv8bf16_0(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: vmerge_truelhs_nxv8bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vc = select <vscale x 8 x i1> splat (i1 1), <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vmerge_falselhs_nxv8bf16_0(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: vmerge_falselhs_nxv8bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %vc = select <vscale x 8 x i1> zeroinitializer, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfmerge_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %cond) {
+; CHECK-LABEL: vfmerge_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT: ret
+ %vc = select <vscale x 16 x i1> %cond, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfmerge_fv_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %cond) {
+; CHECK-ZVFH-LABEL: vfmerge_fv_nxv16bf16:
+; CHECK-ZVFH: # %bb.0:
+; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-ZVFH-NEXT: ret
+;
+; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv16bf16:
+; CHECK-ZVFHMIN: # %bb.0:
+; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5
+; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-ZVFHMIN-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = select <vscale x 16 x i1> %cond, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %va
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfmerge_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %cond) {
+; CHECK-LABEL: vfmerge_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: ret
+ %vc = select <vscale x 32 x i1> %cond, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfmerge_fv_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %cond) {
+; CHECK-ZVFH-LABEL: vfmerge_fv_nxv32bf16:
+; CHECK-ZVFH: # %bb.0:
+; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-ZVFH-NEXT: ret
+;
+; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv32bf16:
+; CHECK-ZVFHMIN: # %bb.0:
+; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-ZVFHMIN-NEXT: vfmv.v.f v24, fa5
+; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24
+; CHECK-ZVFHMIN-NEXT: vmv.v.v v20, v16
+; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-ZVFHMIN-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = select <vscale x 32 x i1> %cond, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va
+ ret <vscale x 32 x bfloat> %vc
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index ee0617c931480..d1049e14fa29a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
@@ -922,3 +922,75 @@ define <vscale x 2 x i1> @select_unknown_T_T(<vscale x 2 x i1> %x, <vscale x 2 x
%a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %y, i32 %evl)
ret <vscale x 2 x i1> %a
}
+
+declare <vscale x 1 x bfloat> @llvm.vp.select.nxv1bf16(<vscale x 1 x i1>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, i32)
+
+define <vscale x 1 x bfloat> @select_nxv1bf16(<vscale x 1 x i1> %a, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.select.nxv1bf16(<vscale x 1 x i1> %a, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.select.nxv2bf16(<vscale x 2 x i1>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
+
+define <vscale x 2 x bfloat> @select_nxv2bf16(<vscale x 2 x i1> %a, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.select.nxv2bf16(<vscale x 2 x i1> %a, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.select.nxv4bf16(<vscale x 4 x i1>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
+
+define <vscale x 4 x bfloat> @select_nxv4bf16(<vscale x 4 x i1> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.select.nxv4bf16(<vscale x 4 x i1> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.select.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
+
+define <vscale x 8 x bfloat> @select_nxv8bf16(<vscale x 8 x i1> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.select.nxv8bf16(<vscale x 8 x i1> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.select.nxv16bf16(<vscale x 16 x i1>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32)
+
+define <vscale x 16 x bfloat> @select_nxv16bf16(<vscale x 16 x i1> %a, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.select.nxv16bf16(<vscale x 16 x i1> %a, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.select.nxv32bf16(<vscale x 32 x i1>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i32)
+
+define <vscale x 32 x bfloat> @select_nxv32bf16(<vscale x 32 x i1> %a, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.select.nxv32bf16(<vscale x 32 x i1> %a, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
More information about the llvm-commits
mailing list