[llvm] [RISCV] Support Zvfbfa codegen and C intrinsics (PR #161158)
Brandon Wu via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 18 07:41:41 PDT 2025
https://github.com/4vtomat updated https://github.com/llvm/llvm-project/pull/161158
>From f98ebcc137067c3d70a9c1b7b9dfb2f6a5765a08 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Sat, 18 Oct 2025 07:37:04 -0700
Subject: [PATCH] [RISCV][llvm] Support Zvfbfa codegen and vsetvli insertion
spec: https://github.com/aswaterman/riscv-misc/blob/main/isa/zvfbfa.adoctest
Co-authored-by: Craig Topper <craig.topper at sifive.com>
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 3 +-
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 22 +
llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td | 675 +++++++++++++++
llvm/lib/Target/RISCV/RISCVSubtarget.h | 5 +-
.../RISCV/rvv/mixed-float-bf16-arith.ll | 186 +++++
llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll | 607 ++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll | 294 +++++++
llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll | 571 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll | 258 ++++++
llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll | 571 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll | 607 ++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll | 88 ++
llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll | 161 ++++
llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll | 216 +++++
.../test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll | 226 +++++
.../test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll | 270 ++++++
.../CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll | 270 ++++++
llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll | 288 +++++++
llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll | 288 +++++++
llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll | 553 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll | 282 +++++++
llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll | 264 ++++++
llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll | 282 +++++++
llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll | 571 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll | 571 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll | 571 +++++++++++++
.../test/CodeGen/RISCV/rvv/vfslide1down-bf.ll | 288 +++++++
llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll | 294 +++++++
llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll | 559 +++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll | 519 ++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll | 773 ++++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll | 264 ++++++
llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll | 264 ++++++
llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll | 506 ++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll | 519 ++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll | 506 ++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll | 506 ++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll | 519 ++++++++++++
llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll | 773 ++++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll | 496 +++++++++++
llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll | 496 +++++++++++
llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll | 496 +++++++++++
llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll | 496 +++++++++++
llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll | 496 +++++++++++
llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll | 496 +++++++++++
52 files changed, 21835 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 1b7cb9bd2169d..636e31c47ddba 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -699,7 +699,8 @@ class VSETVLIInfo {
"Can't encode VTYPE for uninitialized or unknown");
if (TWiden != 0)
return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
- return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic,
+ AltFmt);
}
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ddb53a2ce62b3..12f776bbd4fa4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3775,11 +3775,13 @@ std::string RISCVInstrInfo::createMIROperandComment(
#define CASE_VFMA_OPCODE_VV(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
#define CASE_VFMA_SPLATS(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
// clang-format on
@@ -4003,11 +4005,13 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
// clang-format on
@@ -4469,6 +4473,20 @@ bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
+
+#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16)
+
+#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16)
// clang-format on
MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
@@ -4478,6 +4496,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return nullptr;
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV):
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): {
assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
@@ -4494,6 +4514,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
llvm_unreachable("Unexpected opcode");
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWADD_ALT_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWSUB_ALT_WV)
}
// clang-format on
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index 0be9eab6870ec..a346b763b7941 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -44,6 +44,336 @@ let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
let mayRaiseFPException = true, Predicates = [HasStdExtZvfbfwma] in
defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM;
+defset list<VTypeInfoToWide> AllWidenableIntToBF16Vectors = {
+ def : VTypeInfoToWide<VI8MF8, VBF16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VBF16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VBF16M1>;
+ def : VTypeInfoToWide<VI8M1, VBF16M2>;
+ def : VTypeInfoToWide<VI8M2, VBF16M4>;
+ def : VTypeInfoToWide<VI8M4, VBF16M8>;
+}
+
+multiclass VPseudoVALU_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVALU_VF_RM_BF16 {
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_WV_WF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_WV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_WF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFMUL_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWMUL_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVMAC_VV_VF_AAXA_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoTernaryV_VV_AAXA_RM<m, 16/*sew*/>,
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, f.SEW>,
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVWMAC_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoTernaryW_VV_RM<m, sew=16>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoTernaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVRCP_V_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVRCP_V_RM_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVMAX_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV",
+ m.MX, 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF",
+ m.MX, f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVSGNJ_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWCVTF_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=8,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 8/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVWCVTD_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=16,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_RM_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+let Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT in {
+let mayRaiseFPException = true in {
+defm PseudoVFADD_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFSUB_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFRSUB_ALT : VPseudoVALU_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWADD_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWADD_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFMUL_ALT : VPseudoVFMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFWMUL_ALT : VPseudoVWMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFRSQRT7_ALT : VPseudoVRCP_V_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFREC7_ALT : VPseudoVRCP_V_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMIN_ALT : VPseudoVMAX_VV_VF_BF16;
+defm PseudoVFMAX_ALT : VPseudoVMAX_VV_VF_BF16;
+}
+
+defm PseudoVFSGNJ_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJN_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJX_ALT : VPseudoVSGNJ_VV_VF_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVMFEQ_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFNE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLT_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFGT_ALT : VPseudoVCMPM_VF;
+defm PseudoVMFGE_ALT : VPseudoVCMPM_VF;
+}
+
+defm PseudoVFCLASS_ALT : VPseudoVCLS_V;
+
+defm PseudoVFMERGE_ALT : VPseudoVMRG_FM;
+
+defm PseudoVFMV_V_ALT : VPseudoVMV_F;
+
+let mayRaiseFPException = true in {
+defm PseudoVFWCVT_F_XU_ALT : VPseudoVWCVTF_V_BF16;
+defm PseudoVFWCVT_F_X_ALT : VPseudoVWCVTF_V_BF16;
+
+defm PseudoVFWCVT_F_F_ALT : VPseudoVWCVTD_V_BF16;
+} // mayRaiseFPException = true
+
+let mayRaiseFPException = true in {
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFNCVT_XU_F_ALT : VPseudoVNCVTI_W_RM;
+defm PseudoVFNCVT_X_F_ALT : VPseudoVNCVTI_W_RM;
+}
+
+defm PseudoVFNCVT_RTZ_XU_F_ALT : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_RTZ_X_F_ALT : VPseudoVNCVTI_W;
+
+defm PseudoVFNCVT_F_F_ALT : VPseudoVNCVTD_W_RM_BF16;
+
+defm PseudoVFNCVT_ROD_F_F_ALT : VPseudoVNCVTD_W_BF16;
+} // mayRaiseFPException = true
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ defvar f = SCALAR_F16;
+ let HasSEWOp = 1, BaseInstr = VFMV_F_S in
+ def "PseudoVFMV_" # f.FX # "_S_ALT" :
+ RISCVVPseudo<(outs f.fprclass:$rd), (ins VR:$rs2, sew:$sew)>,
+ Sched<[WriteVMovFS, ReadVMovFS]>;
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
+ Constraints = "$rd = $passthru" in
+ def "PseudoVFMV_S_" # f.FX # "_ALT" :
+ RISCVVPseudo<(outs VR:$rd),
+ (ins VR:$passthru, f.fprclass:$rs1, AVL:$vl, sew:$sew)>,
+ Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>;
+}
+
+defm PseudoVFSLIDE1UP_ALT : VPseudoVSLD1_VF<"@earlyclobber $rd">;
+defm PseudoVFSLIDE1DOWN_ALT : VPseudoVSLD1_VF;
+} // Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT
+
//===----------------------------------------------------------------------===//
// Patterns
//===----------------------------------------------------------------------===//
@@ -87,6 +417,130 @@ let Predicates = [HasStdExtZvfbfminOrZvfofp8min] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
+
+ defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllBF16Vectors>;
+ defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllBF16Vectors, uimm5>;
+ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
+ eew=16, vtilist=AllBF16Vectors>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllBF16Vectors, uimm5>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllBF16Vectors, uimm5>;
+
+ foreach fvti = AllBF16Vectors in {
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM",
+ fvti.Vector,
+ fvti.Vector, fvti.Vector, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.RegClass>;
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
+ "V"#fvti.ScalarSuffix#"M",
+ fvti.Vector,
+ fvti.Vector, fvti.Scalar, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.ScalarRegClass>;
+ defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
+ def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru),
+ (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Scalar (fpimm0)),
+ (fvti.Mask VMV0:$vm), VLOpFrag)),
+ (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vv_vl fvti.RegClass:$rs2,
+ (ivti.Vector fvti.RegClass:$rs1),
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VV_"# fvti.LMul.MX#"_E"# fvti.SEW#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (riscv_vrgather_vx_vl fvti.RegClass:$rs2, GPR:$rs1,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vx_vl fvti.RegClass:$rs2,
+ uimm5:$imm,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, uimm5:$imm,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
let Predicates = [HasStdExtZvfbfwma] in {
@@ -97,3 +551,224 @@ let Predicates = [HasStdExtZvfbfwma] in {
defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACCBF16",
AllWidenableBF16ToFloatVectors>;
}
+
+multiclass VPatConversionVI_VF_BF16<string intrinsic, string instruction> {
+ foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VI_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
+ vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionWF_VF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypeMinimalPredicates<fvti>.Predicates,
+ GetVTypeMinimalPredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionVI_WF_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVI_WF_RM_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionRoundingMode<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>;
+ }
+}
+
+let Predicates = [HasStdExtZvfbfa] in {
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwadd", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwsub", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwadd_w", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmacc", "PseudoVFWMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfgt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatConversionVI_VF_BF16<"int_riscv_vfclass", "PseudoVFCLASS_ALT">;
+foreach vti = AllBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE_ALT",
+ "V"#vti.ScalarSuffix#"M",
+ vti.Vector,
+ vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+}
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VF_BF16<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F_ALT">;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F_ALT">;
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatConversionVF_WF_BF16<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP_ALT", AllBF16Vectors>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN_ALT", AllBF16Vectors>;
+
+foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
+ // 13.16. Vector Floating-Point Move Instruction
+ // If we're splatting fpimm0, use vmv.v.x vd, x0.
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX)
+ $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_V_ALT_" # fvti.ScalarSuffix # "_" #
+ fvti.LMul.MX)
+ $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+}
+
+foreach vti = NoGroupBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (fpimm0)),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ vti.ScalarRegClass:$rs1,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_ALT")
+ vti.RegClass:$passthru,
+ (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+ }
+
+ defvar vfmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
+ vti.ScalarSuffix,
+ "_S_ALT"));
+ // Only pattern-match extract-element operations where the index is 0. Any
+ // other index will have been custom-lowered to slide the vector correctly
+ // into place.
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)),
+ (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>;
+}
+} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 6acf799dd00d3..334db4bfb75d9 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -288,9 +288,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
bool hasVInstructionsF16Minimal() const { return HasStdExtZvfhmin; }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
- bool hasVInstructionsBF16Minimal() const { return HasStdExtZvfbfmin; }
+ bool hasVInstructionsBF16Minimal() const {
+ return HasStdExtZvfbfmin || HasStdExtZvfbfa;
+ }
bool hasVInstructionsF32() const { return HasStdExtZve32f; }
bool hasVInstructionsF64() const { return HasStdExtZve64d; }
+ bool hasVInstructionsBF16() const { return HasStdExtZvfbfa; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
diff --git a/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll
new file mode 100644
index 0000000000000..489323b323110
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ iXLen, iXLen);
+
+declare <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ iXLen);
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @test_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_half_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vse16.v v10, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %3,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ call void @llvm.riscv.vse(<vscale x 1 x half> %a, ptr %ptr, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %b
+}
+
+define <vscale x 1 x bfloat> @test_i32_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i32> %3, <vscale x 1 x i32> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_i32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vadd.vv v10, v10, v11
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32(
+ <vscale x 1 x i32> poison,
+ <vscale x 1 x i32> %3,
+ <vscale x 1 x i32> %4,
+ iXLen %2)
+
+ %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ call void @llvm.riscv.vse(<vscale x 1 x i32> %a, ptr %ptr, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %b
+}
+
+define <vscale x 1 x bfloat> @test_half_bf16_half(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_half_bf16_half:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v11
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %3,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ %c = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %a,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ store <vscale x 1 x half> %c, ptr %ptr
+
+ ret <vscale x 1 x bfloat> %b
+}
+
+define <vscale x 1 x bfloat> @test_bf16_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_bf16_half_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vse16.v v10, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+ <vscale x 1 x half> poison,
+ <vscale x 1 x half> %3,
+ <vscale x 1 x half> %4,
+ iXLen 0, iXLen %2)
+
+ %c = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %a,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ store <vscale x 1 x half> %b, ptr %ptr
+
+ ret <vscale x 1 x bfloat> %c
+}
+
+define <vscale x 1 x bfloat> @test_bf16_i16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i16> %3, <vscale x 1 x i16> %4, ptr %ptr) nounwind {
+; CHECK-LABEL: test_bf16_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vadd.vv v9, v10, v11
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; CHECK-NEXT: vse16.v v9, (a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ %b = call <vscale x 1 x i16> @llvm.riscv.vadd.nxv1i16.nxv1i16(
+ <vscale x 1 x i16> poison,
+ <vscale x 1 x i16> %3,
+ <vscale x 1 x i16> %4,
+ iXLen %2)
+
+ store <vscale x 1 x i16> %b, ptr %ptr
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll
new file mode 100644
index 0000000000000..db1b081258d5f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll
@@ -0,0 +1,607 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll
new file mode 100644
index 0000000000000..d7d49b379b5a4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vfclass_v_nxv1i16_nxv1bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 1 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfclass.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16(
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vfclass_v_nxv2i16_nxv2bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 2 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfclass.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ <vscale x 2 x i16> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16(
+ <vscale x 2 x i16> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vfclass_v_nxv4i16_nxv4bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 4 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfclass.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ <vscale x 4 x i16> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16(
+ <vscale x 4 x i16> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vfclass_v_nxv8i16_nxv8bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 8 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfclass.v v8, v10, v0.t
+; CHECK-NEXT: ret
+ <vscale x 8 x i16> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16(
+ <vscale x 8 x i16> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vfclass_v_nxv16i16_nxv16bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 16 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfclass.v v8, v12, v0.t
+; CHECK-NEXT: ret
+ <vscale x 16 x i16> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16(
+ <vscale x 16 x i16> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vfclass_v_nxv32i16_nxv32bf16(
+; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: ret
+ <vscale x 32 x bfloat> %0,
+ iXLen %1) nounwind {
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16(
+; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, tu, mu
+; CHECK-NEXT: vfclass.v v8, v16, v0.t
+; CHECK-NEXT: ret
+ <vscale x 32 x i16> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3) nounwind {
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16(
+ <vscale x 32 x i16> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 32 x i16> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll
new file mode 100644
index 0000000000000..13821d745846f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmacc.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll
new file mode 100644
index 0000000000000..09fc199c29d23
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmadd.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmacc.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmacc.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll
new file mode 100644
index 0000000000000..a337d3061ce78
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmax.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll
new file mode 100644
index 0000000000000..86ba7c7fb7fe6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 1 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 2 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 4 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 8 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 16 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.h.x fa5, zero
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat zeroinitializer,
+ <vscale x 32 x i1> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll
new file mode 100644
index 0000000000000..37c0cf506a6fa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmin.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll
new file mode 100644
index 0000000000000..948d2196f2bb4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsac.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsac.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll
new file mode 100644
index 0000000000000..6838f37339e98
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsub.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsub.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfmsub.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsac.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmsac.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll
new file mode 100644
index 0000000000000..44bce723c39d4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll
@@ -0,0 +1,607 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll
new file mode 100644
index 0000000000000..fbc7311945c8b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+experimental-zvfbfa -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+experimental-zvfbfa -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv1bf16(<vscale x 1 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv2bf16(<vscale x 2 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv4bf16(<vscale x 4 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv8bf16(<vscale x 8 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv16bf16(<vscale x 16 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat> %0)
+ ret bfloat %a
+}
+
+declare bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat>)
+
+define bfloat @intrinsic_vfmv.f.s_s_nxv32bf16(<vscale x 32 x bfloat> %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: ret
+entry:
+ %a = call bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat> %0)
+ ret bfloat %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll
new file mode 100644
index 0000000000000..a810809fca515
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefixes=CHECK
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat>, bfloat, iXLen)
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat>, bfloat, iXLen)
+
+define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat>, bfloat, iXLen)
+
+define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat>, bfloat, iXLen)
+
+define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat>, bfloat, iXLen)
+
+define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat>, bfloat, iXLen)
+
+define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat 0.0, iXLen %1)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16_negzero(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16_negzero:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat -0.0, iXLen %1)
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll
new file mode 100644
index 0000000000000..f3293ddc83ef9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_f_nxv1bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfmv.v.f_f_nxv2bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfmv.v.f_f_nxv4bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfmv.v.f_f_nxv8bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfmv.v.f_f_nxv16bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfmv.v.f_f_nxv32bf16(bfloat %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ bfloat %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_zero_nxv1bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vmv.v.i_zero_nxv2bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vmv.v.i_zero_nxv4bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vmv.v.i_zero_nxv8bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vmv.v.i_zero_nxv16bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vmv.v.i_zero_nxv32bf16(iXLen %0) nounwind {
+; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ bfloat 0.0,
+ iXLen %0)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll
new file mode 100644
index 0000000000000..7d587fd55cd83
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x float>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x float>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x float>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x float>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x float>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x float>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x float>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x float>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x float>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x float> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x float>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll
new file mode 100644
index 0000000000000..ee9e3d1b9f630
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll
@@ -0,0 +1,270 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll
new file mode 100644
index 0000000000000..521f7274dc5c9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll
@@ -0,0 +1,270 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll
new file mode 100644
index 0000000000000..ab9ebade287e6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v10, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v12, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.x.f.w v16, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.x.f.w v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll
new file mode 100644
index 0000000000000..61c6803ce12bd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v10, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v12, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvt.xu.f.w v16, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvt.xu.f.w v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll
new file mode 100644
index 0000000000000..4b4091ba7acbe
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmacc.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmacc.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll
new file mode 100644
index 0000000000000..2bb6bf5ae9e26
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmadd.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmacc.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmacc.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll
new file mode 100644
index 0000000000000..cfbaafa00c043
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsac.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsac.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll
new file mode 100644
index 0000000000000..5ebbb90c4c5a2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v10, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsub.vv v8, v12, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsub.vv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0);
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsac.vv v8, v10, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfnmsac.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %2,
+ bfloat %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %3, iXLen 3)
+
+ ret <vscale x 1 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll
new file mode 100644
index 0000000000000..1211415ffe432
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrec7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrec7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrec7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrec7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrec7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrec7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfrec7.v v8, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen 0, iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfrec7.v v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16(
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %0,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll
new file mode 100644
index 0000000000000..4626b865ab454
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfrsqrt7.v v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16(
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %0,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll
new file mode 100644
index 0000000000000..54a6d48cfcc5b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll
new file mode 100644
index 0000000000000..2cd698d9aaa3c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll
new file mode 100644
index 0000000000000..08340becc9ed4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll
new file mode 100644
index 0000000000000..e51a42e2b8cea
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjx.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll
new file mode 100644
index 0000000000000..c65719c3a4c1a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll
new file mode 100644
index 0000000000000..57a48986fdfcd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfslide1up.vf v10, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfslide1up.vf v12, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfslide1up.vf v16, v8, fa0
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll
new file mode 100644
index 0000000000000..aea75211b70b5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll
@@ -0,0 +1,559 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x bfloat> %2,
+ <vscale x 32 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x bfloat> %0,
+ bfloat %1,
+ iXLen 7, iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ bfloat,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu
+; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ bfloat %2,
+ <vscale x 32 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll
new file mode 100644
index 0000000000000..62feac824efad
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll
new file mode 100644
index 0000000000000..c5417e826bf41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll
@@ -0,0 +1,773 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl4re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwadd.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwadd.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll
new file mode 100644
index 0000000000000..b7df45bad36e6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfwcvt.f.x.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x i8> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.x.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x i8> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.x.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll
new file mode 100644
index 0000000000000..c370261a77bc0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> poison,
+ <vscale x 1 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x i8> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> poison,
+ <vscale x 2 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x i8> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> poison,
+ <vscale x 4 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> poison,
+ <vscale x 8 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> poison,
+ <vscale x 16 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> poison,
+ <vscale x 32 x i8> %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll
new file mode 100644
index 0000000000000..a3f667818ab0a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll
@@ -0,0 +1,506 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v10, v11
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v12, v14
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwmsac.vv v8, v16, v20
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwmsac.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll
new file mode 100644
index 0000000000000..577b93af7a918
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwmul.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwmul.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwmul.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwmul.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwmul.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll
new file mode 100644
index 0000000000000..1e05e4c7acf25
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll
@@ -0,0 +1,506 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v10, v11
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v12, v14
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmacc.vv v8, v16, v20
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmacc.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll
new file mode 100644
index 0000000000000..223ad4f7483f6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll
@@ -0,0 +1,506 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v10, v11
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v12, v14
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmsac.vv v8, v16, v20
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmsac.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 0, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
+; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll
new file mode 100644
index 0000000000000..d993e4e610d2c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.vv v10, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v10, v11, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v12, v14, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.vv v8, v16, v20, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.vf v9, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll
new file mode 100644
index 0000000000000..b22899a100e4a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll
@@ -0,0 +1,773 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v10, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v12, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl4re16.v v24, (a0)
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16(
+ <vscale x 16 x float> poison,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wv v8, v8, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x i1> %2,
+ iXLen 0, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfwsub.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> poison,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfwsub.wv v10, v9, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> poison,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> poison,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> poison,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x bfloat> %0,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x float> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll
new file mode 100644
index 0000000000000..9bd859b3452f2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v10
+; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v12
+; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfeq.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll
new file mode 100644
index 0000000000000..73946dc1a744c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v10, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfle.vv v0, v10, v8
+; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v12, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfle.vv v0, v12, v8
+; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfge.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll
new file mode 100644
index 0000000000000..fac324ca5c125
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v10, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmflt.vv v0, v10, v8
+; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v12, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmflt.vv v0, v12, v8
+; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfgt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll
new file mode 100644
index 0000000000000..8356b7bbd3ff7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v10
+; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v12
+; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfle.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll
new file mode 100644
index 0000000000000..2e1bcc5e87bfc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v10
+; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v12
+; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmflt.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll
new file mode 100644
index 0000000000000..283ffc500fdde
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll
@@ -0,0 +1,496 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16(
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x bfloat> %3,
+ <vscale x 1 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16(
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x bfloat> %3,
+ <vscale x 2 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16(
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x bfloat> %3,
+ <vscale x 4 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v14, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v10
+; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16(
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x bfloat> %3,
+ <vscale x 8 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v20, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v12
+; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: ret
+entry:
+ %mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16(
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen %4)
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x bfloat> %3,
+ <vscale x 16 x i1> %mask,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16(
+ <vscale x 1 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16(
+ <vscale x 1 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1>,
+ <vscale x 1 x bfloat>,
+ bfloat,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16(
+ <vscale x 1 x i1> %0,
+ <vscale x 1 x bfloat> %1,
+ bfloat %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16(
+ <vscale x 2 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16(
+ <vscale x 2 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1>,
+ <vscale x 2 x bfloat>,
+ bfloat,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16(
+ <vscale x 2 x i1> %0,
+ <vscale x 2 x bfloat> %1,
+ bfloat %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16(
+ <vscale x 4 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16(
+ <vscale x 4 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1>,
+ <vscale x 4 x bfloat>,
+ bfloat,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16(
+ <vscale x 4 x i1> %0,
+ <vscale x 4 x bfloat> %1,
+ bfloat %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16(
+ <vscale x 8 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16(
+ <vscale x 8 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1>,
+ <vscale x 8 x bfloat>,
+ bfloat,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16(
+ <vscale x 8 x i1> %0,
+ <vscale x 8 x bfloat> %1,
+ bfloat %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16(
+ <vscale x 16 x bfloat>,
+ bfloat,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vmfne.vf v0, v8, fa0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16(
+ <vscale x 16 x bfloat> %0,
+ bfloat %1,
+ iXLen %2)
+
+ ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1>,
+ <vscale x 16 x bfloat>,
+ bfloat,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16bf16_bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu
+; CHECK-NEXT: vmv1r.v v13, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v13
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16(
+ <vscale x 16 x i1> %0,
+ <vscale x 16 x bfloat> %1,
+ bfloat %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret <vscale x 16 x i1> %a
+}
+
More information about the llvm-commits
mailing list