[llvm] 2b6df4a - [RISCV] Add codegen support for bf16 vector
Jun Sha via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 27 19:31:34 PDT 2023
Author: Jun Sha (Joshua)
Date: 2023-07-28T09:54:23+08:00
New Revision: 2b6df4a336cc6bd814e46a856a3868ef8267afab
URL: https://github.com/llvm/llvm-project/commit/2b6df4a336cc6bd814e46a856a3868ef8267afab
DIFF: https://github.com/llvm/llvm-project/commit/2b6df4a336cc6bd814e46a856a3868ef8267afab.diff
LOG: [RISCV] Add codegen support for bf16 vector
This patch adds codegen support for vector with bfloat16 type in llvm backend.
With this patch, Zvbfmin/Zvbfwma instructions as well as vle16/vse16 can generated from newly added bf16 IR intrinsics.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D156287
Added:
llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll
llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16.ll
Modified:
llvm/include/llvm/IR/IntrinsicsRISCV.td
llvm/lib/Target/RISCV/RISCVFeatures.td
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/lib/Target/RISCV/RISCVInstrInfoV.td
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
llvm/lib/Target/RISCV/RISCVRegisterInfo.td
llvm/lib/Target/RISCV/RISCVSubtarget.h
llvm/test/CodeGen/RISCV/rvv/vle.ll
llvm/test/CodeGen/RISCV/rvv/vse.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index e3476b160db68b..4d7bfbd5669d12 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1536,6 +1536,7 @@ let TargetPrefix = "riscv" in {
defm vfnmsub : RISCVTernaryAAXARoundingMode;
defm vfwmacc : RISCVTernaryWideRoundingMode;
+ defm vfwmaccbf16 : RISCVTernaryWideRoundingMode;
defm vfwnmacc : RISCVTernaryWideRoundingMode;
defm vfwmsac : RISCVTernaryWideRoundingMode;
defm vfwnmsac : RISCVTernaryWideRoundingMode;
@@ -1640,6 +1641,7 @@ let TargetPrefix = "riscv" in {
defm vfwcvt_rtz_xu_f_v : RISCVConversion;
defm vfwcvt_rtz_x_f_v : RISCVConversion;
defm vfwcvt_f_f_v : RISCVConversion;
+ defm vfwcvtbf16_f_f_v : RISCVConversion;
defm vfncvt_f_xu_w : RISCVConversionRoundingMode;
defm vfncvt_f_x_w : RISCVConversionRoundingMode;
@@ -1648,6 +1650,7 @@ let TargetPrefix = "riscv" in {
defm vfncvt_rtz_xu_f_w : RISCVConversion;
defm vfncvt_rtz_x_f_w : RISCVConversion;
defm vfncvt_f_f_w : RISCVConversionRoundingMode;
+ defm vfncvtbf16_f_f_w : RISCVConversionRoundingMode;
defm vfncvt_rod_f_f_w : RISCVConversion;
// Output: (vector)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 8cbeb3f62f6d4b..f10f97dc5ca6e8 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -477,6 +477,8 @@ def HasStdExtZvfbfwma : Predicate<"Subtarget->hasStdExtZvfbfwma()">,
AssemblerPredicate<(all_of FeatureStdExtZvfbfwma),
"'Zvfbfwma' (Vector BF16 widening mul-add)">;
+def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">;
+
def FeatureStdExtZvfh
: SubtargetFeature<"zvfh", "HasStdExtZvfh", "true",
"'Zvfh' (Vector Half-Precision Floating-Point)",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index abfd56b8400952..6dfe3704678479 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -145,6 +145,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const MVT::SimpleValueType F16VecVTs[] = {
MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
+ static const MVT::SimpleValueType BF16VecVTs[] = {
+ MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
+ MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
static const MVT::SimpleValueType F32VecVTs[] = {
MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
static const MVT::SimpleValueType F64VecVTs[] = {
@@ -187,6 +190,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
+ if (Subtarget.hasVInstructionsBF16())
+ for (MVT VT : BF16VecVTs)
+ addRegClassForRVV(VT);
+
if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index e58e3412aea350..382e5c7b3d9723 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1951,7 +1951,6 @@ include "RISCVInstrInfoZk.td"
// Vector
include "RISCVInstrInfoV.td"
-include "RISCVInstrInfoZvfbf.td"
include "RISCVInstrInfoZvk.td"
// Integer
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index dc1a028e679431..9d22d5be829656 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1803,4 +1803,5 @@ let Predicates = [HasVInstructionsI64, IsRV64] in {
}
} // Predicates = [HasVInstructionsI64, IsRV64]
+include "RISCVInstrInfoZvfbf.td"
include "RISCVInstrInfoVPseudos.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index c3e454cd3786f7..5f7672fd63d8a5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -118,12 +118,15 @@ class PseudoToVInst<string PseudoInst> {
["_F64", "_F"],
["_F32", "_F"],
["_F16", "_F"],
+ ["_BF16", "_F"],
["_VF64", "_VF"],
["_VF32", "_VF"],
["_VF16", "_VF"],
+ ["_VBF16", "_VF"],
["_WF64", "_WF"],
["_WF32", "_WF"],
["_WF16", "_WF"],
+ ["_WBF16", "_WF"],
["_TU", ""],
["_TIED", ""],
["_MASK", ""],
@@ -210,15 +213,28 @@ class FPR_Info<int sew> {
list<LMULInfo> MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8]));
}
+class BFPR_Info<int sew> {
+ RegisterClass fprclass = !cast<RegisterClass>("FPR" # sew);
+ string FX = "BF" # sew;
+ int SEW = sew;
+ list<LMULInfo> MxList = MxSet<sew>.m;
+ list<LMULInfo> MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8]));
+}
+
def SCALAR_F16 : FPR_Info<16>;
def SCALAR_F32 : FPR_Info<32>;
def SCALAR_F64 : FPR_Info<64>;
+def SCALAR_BF16 : BFPR_Info<16>;
+
defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64];
// Used for widening instructions. It excludes F64.
defvar FPListW = [SCALAR_F16, SCALAR_F32];
+// Used for widening bf16 instructions.
+defvar BFPListW = [SCALAR_BF16];
+
class NFSet<LMULInfo m> {
list<int> L = !cond(!eq(m.value, V_M8.value): [],
!eq(m.value, V_M4.value): [2],
@@ -273,6 +289,7 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
string ScalarSuffix = !cond(!eq(Scal, XLenVT) : "X",
!eq(Scal, f16) : "F16",
+ !eq(Scal, bf16) : "BF16",
!eq(Scal, f32) : "F32",
!eq(Scal, f64) : "F64");
}
@@ -356,6 +373,25 @@ defset list<VTypeInfo> AllVectors = {
}
}
+defset list<VTypeInfo> AllBFloatVectors = {
+ defset list<VTypeInfo> NoGroupBFloatVectors = {
+ defset list<VTypeInfo> FractionalGroupBFloatVectors = {
+ def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>;
+ def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>;
+ }
+ def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, VR, V_M1, bf16, FPR16>;
+ }
+
+ defset list<GroupVTypeInfo> GroupBFloatVectors = {
+ def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16,
+ VRM2, V_M2, bf16, FPR16>;
+ def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16,
+ VRM4, V_M4, bf16, FPR16>;
+ def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16,
+ VRM8, V_M8, bf16, FPR16>;
+ }
+}
+
// This functor is used to obtain the int vector type that has the same SEW and
// multiplier as the input parameter type
class GetIntVTypeInfo<VTypeInfo vti> {
@@ -490,6 +526,14 @@ defset list<VTypeInfoToWide> AllWidenableIntToFloatVectors = {
def : VTypeInfoToWide<VI32M2, VF64M4>;
def : VTypeInfoToWide<VI32M4, VF64M8>;
}
+
+defset list<VTypeInfoToWide> AllWidenableBFloatToFloatVectors = {
+ def : VTypeInfoToWide<VBF16MF4, VF32MF2>;
+ def : VTypeInfoToWide<VBF16MF2, VF32M1>;
+ def : VTypeInfoToWide<VBF16M1, VF32M2>;
+ def : VTypeInfoToWide<VBF16M2, VF32M4>;
+ def : VTypeInfoToWide<VBF16M4, VF32M8>;
+}
// This class holds the record of the RISCVVPseudoTable below.
// This represents the information we need in codegen for each pseudo.
@@ -723,6 +767,7 @@ class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins, int sew = 0> :
class GetVTypePredicates<VTypeInfo vti> {
list<Predicate> Predicates = !cond(!eq(vti.Scalar, f16) : [HasVInstructionsF16],
+ !eq(vti.Scalar, bf16) : [HasVInstructionsBF16],
!eq(vti.Scalar, f32) : [HasVInstructionsAnyF],
!eq(vti.Scalar, f64) : [HasVInstructionsF64],
!eq(vti.SEW, 64) : [HasVInstructionsI64],
@@ -3047,6 +3092,12 @@ multiclass VPseudoTernaryW_VF_RM<LMULInfo m, FPR_Info f> {
m.vrclass, m, constraint>;
}
+multiclass VPseudoTernaryW_VF_BF_RM<LMULInfo m, BFPR_Info f> {
+ defvar constraint = "@earlyclobber $rd";
+ defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, f.fprclass,
+ m.vrclass, m, constraint>;
+}
+
multiclass VPseudoVSLDVWithPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
@@ -3158,6 +3209,31 @@ multiclass VPseudoVWMAC_VV_VF_RM {
}
}
+multiclass VPseudoVWMAC_VV_VF_BF_RM {
+ foreach m = MxListFW in {
+ defvar mx = m.MX;
+ defvar WriteVFWMulAddV_MX = !cast<SchedWrite>("WriteVFWMulAddV_" # mx);
+ defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
+
+ defm "" : VPseudoTernaryW_VV_RM<m>,
+ Sched<[WriteVFWMulAddV_MX, ReadVFWMulAddV_MX,
+ ReadVFWMulAddV_MX, ReadVFWMulAddV_MX, ReadVMask]>;
+ }
+
+ foreach f = BFPListW in {
+ foreach m = f.MxListFW in {
+ defvar mx = m.MX;
+ defvar WriteVFWMulAddF_MX = !cast<SchedWrite>("WriteVFWMulAddF_" # mx);
+ defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
+ defvar ReadVFWMulAddF_MX = !cast<SchedRead>("ReadVFWMulAddF_" # mx);
+
+ defm "" : VPseudoTernaryW_VF_BF_RM<m, f>,
+ Sched<[WriteVFWMulAddF_MX, ReadVFWMulAddV_MX,
+ ReadVFWMulAddV_MX, ReadVFWMulAddF_MX, ReadVMask]>;
+ }
+ }
+}
+
multiclass VPseudoVCMPM_VV_VX_VI {
foreach m = MxList in {
defvar mx = m.MX;
@@ -5635,6 +5711,19 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
}
}
+multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ }
+}
+
multiclass VPatConversionVI_WF <string intrinsic, string instruction> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
@@ -5695,6 +5784,18 @@ multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction> {
}
}
+multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
multiclass VPatCompare_VI<string intrinsic, string inst,
ImmLeaf ImmType> {
foreach vti = AllIntegerVectors in {
@@ -6140,6 +6241,8 @@ defm PseudoVFWMACC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF_RM;
+let Predicates = [HasStdExtZvfbfwma] in
+defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM;
}
//===----------------------------------------------------------------------===//
@@ -6244,6 +6347,7 @@ defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V;
+defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V;
} // mayRaiseFPException = true
//===----------------------------------------------------------------------===//
@@ -6269,6 +6373,7 @@ defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W;
let hasSideEffects = 0, hasPostISelHook = 1 in
defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM;
+defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM;
defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
@@ -6804,7 +6909,10 @@ defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC",
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC",
AllWidenableFloatVectors>;
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors>;
+let Predicates = [HasStdExtZvfbfwma] in
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmaccbf16", "PseudoVFWMACCBF16",
+ AllWidenableBFloatToFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.8. Vector Floating-Point Square-Root Instruction
@@ -6909,6 +7017,8 @@ defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">;
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">;
defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">;
+defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v",
+ "PseudoVFWCVTBF16_F_F">;
//===----------------------------------------------------------------------===//
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
@@ -6920,6 +7030,8 @@ defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">
defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
+defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w",
+ "PseudoVFNCVTBF16_F_F">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 603d220ff5f003..d16db5ce754391 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -883,15 +883,16 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
- FractionalGroupFloatVectors) in
+ FractionalGroupFloatVectors,
+ FractionalGroupBFloatVectors) in
let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatUSLoadStoreSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.AVL, vti.RegClass>;
-foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VF16M1, VF32M1, VF64M1] in
+foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in
let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
-foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors) in
+foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in
let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index b23785cd230204..117668072bdc40 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -313,6 +313,13 @@ defvar vfloat16m2_t = nxv8f16;
defvar vfloat16m4_t = nxv16f16;
defvar vfloat16m8_t = nxv32f16;
+defvar vbfloat16mf4_t = nxv1bf16;
+defvar vbfloat16mf2_t = nxv2bf16;
+defvar vbfloat16m1_t = nxv4bf16;
+defvar vbfloat16m2_t = nxv8bf16;
+defvar vbfloat16m4_t = nxv16bf16;
+defvar vbfloat16m8_t = nxv32bf16;
+
defvar vfloat32mf2_t = nxv1f32;
defvar vfloat32m1_t = nxv2f32;
defvar vfloat32m2_t = nxv4f32;
@@ -489,19 +496,23 @@ defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
vbool32_t, vbool64_t];
defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
- vfloat16m1_t, vfloat32m1_t, vfloat64m1_t,
- vint8mf2_t, vint8mf4_t, vint8mf8_t,
- vint16mf2_t, vint16mf4_t, vint32mf2_t,
- vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t];
+ vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t,
+ vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t,
+ vint16mf2_t, vint16mf4_t, vint32mf2_t,
+ vfloat16mf4_t, vfloat16mf2_t, vbfloat16mf4_t,
+ vbfloat16mf2_t, vfloat32mf2_t];
defvar VM2VTs = [vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
- vfloat16m2_t, vfloat32m2_t, vfloat64m2_t];
-
+ vfloat16m2_t, vbfloat16m2_t,
+ vfloat32m2_t, vfloat64m2_t];
+
defvar VM4VTs = [vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
- vfloat16m4_t, vfloat32m4_t, vfloat64m4_t];
-
+ vfloat16m4_t, vbfloat16m4_t,
+ vfloat32m4_t, vfloat64m4_t];
+
defvar VM8VTs = [vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
- vfloat16m8_t, vfloat32m8_t, vfloat64m8_t];
+ vfloat16m8_t, vbfloat16m8_t,
+ vfloat32m8_t, vfloat64m8_t];
def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
(add (sequence "V%u", 8, 31),
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index a831beb7edd956..f51c0f74473a0d 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -168,6 +168,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool hasVInstructions() const { return HasStdExtZve32x; }
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
+ bool hasVInstructionsBF16() const {
+ return HasStdExtZvfbfmin || HasStdExtZvfbfwma;
+ }
// FIXME: Consider Zfinx in the future
bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
// FIXME: Consider Zdinx in the future
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll
new file mode 100644
index 00000000000000..16e44383df45ad
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfmin \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfmin \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+declare <vscale x 1 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x float>,
+ iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x float>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvtbf16_mask_f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x float>,
+ iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x float>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvtbf16_mask_f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x float>,
+ iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x float>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvtbf16_mask_f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x float>,
+ iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x float>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvtbf16_mask_f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x float>,
+ iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x float>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvtbf16_mask_f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
new file mode 100644
index 00000000000000..71f073458d785b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfmin \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfmin \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+declare <vscale x 1 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwcvtbf16_mask_f.f.v_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwcvtbf16_mask_f.f.v_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwcvtbf16_mask_f.f.v_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwcvtbf16_mask_f.f.v_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwcvtbf16_mask_f.f.v_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x float> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16.ll
new file mode 100644
index 00000000000000..86c39cf2cf4677
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16.ll
@@ -0,0 +1,464 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfwma\
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfwma \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+declare <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmaccbf16_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmaccbf16_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv1f32_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.mask.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmaccbf16_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmaccbf16_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv2f32_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.mask.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmaccbf16_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vv v8, v10, v11
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmaccbf16_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv4f32_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vv v8, v10, v11, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.mask.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmaccbf16_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vv v8, v12, v14
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmaccbf16_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv8f32_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vv v8, v12, v14, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.mask.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmaccbf16_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vv v8, v16, v20
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmaccbf16_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv16f32_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vv v8, v16, v20, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.mask.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmaccbf16_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.mask.nxv1f32.bf16(
+ <vscale x 1 x float>,
+ bfloat,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmaccbf16_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv1f32_bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwmaccbf16.mask.nxv1f32.bf16(
+ <vscale x 1 x float> %0,
+ bfloat %1,
+ <vscale x 1 x bfloat> %2,
+ <vscale x 1 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmaccbf16_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.mask.nxv2f32.bf16(
+ <vscale x 2 x float>,
+ bfloat,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmaccbf16_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv2f32_bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwmaccbf16.mask.nxv2f32.bf16(
+ <vscale x 2 x float> %0,
+ bfloat %1,
+ <vscale x 2 x bfloat> %2,
+ <vscale x 2 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmaccbf16_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.mask.nxv4f32.bf16(
+ <vscale x 4 x float>,
+ bfloat,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmaccbf16_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv4f32_bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16.mask.nxv4f32.bf16(
+ <vscale x 4 x float> %0,
+ bfloat %1,
+ <vscale x 4 x bfloat> %2,
+ <vscale x 4 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmaccbf16_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.mask.nxv8f32.bf16(
+ <vscale x 8 x float>,
+ bfloat,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmaccbf16_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv8f32_bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwmaccbf16.mask.nxv8f32.bf16(
+ <vscale x 8 x float> %0,
+ bfloat %1,
+ <vscale x 8 x bfloat> %2,
+ <vscale x 8 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmaccbf16_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ iXLen 7, iXLen %3, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.mask.nxv16f32.bf16(
+ <vscale x 16 x float>,
+ bfloat,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmaccbf16_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv16f32_bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfwmaccbf16.mask.nxv16f32.bf16(
+ <vscale x 16 x float> %0,
+ bfloat %1,
+ <vscale x 16 x bfloat> %2,
+ <vscale x 16 x i1> %3,
+ iXLen 7, iXLen %4, iXLen 0)
+
+ ret <vscale x 16 x float> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vle.ll b/llvm/test/CodeGen/RISCV/rvv/vle.ll
index b2fd7c6ae5d9cf..5c724376b68e54 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vle.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,,+experimental-zfbfmin,+experimental-zvfbfmin \
; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,,+experimental-zfbfmin,+experimental-zvfbfmin \
; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
<vscale x 1 x i64>,
@@ -1293,6 +1293,264 @@ entry:
ret <vscale x 32 x half> %a
}
+declare <vscale x 1 x bfloat> @llvm.riscv.vle.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>*,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vle_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat>* %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vle.nxv1bf16(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x bfloat>* %0,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vle.mask.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>*,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vle_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat>* %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vle.mask.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat>* %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vle.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>*,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vle_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat>* %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vle.nxv2bf16(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x bfloat>* %0,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vle.mask.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>*,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vle_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat>* %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vle.mask.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat>* %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vle.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>*,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vle_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat>* %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vle.nxv4bf16(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x bfloat>* %0,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vle.mask.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>*,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vle_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat>* %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vle.mask.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat>* %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vle.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>*,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vle_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat>* %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vle.nxv8bf16(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x bfloat>* %0,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vle.mask.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>*,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vle_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat>* %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vle.mask.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat>* %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vle.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>*,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vle_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat>* %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vle.nxv16bf16(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x bfloat>* %0,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vle.mask.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>*,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vle_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat>* %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vle.mask.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat>* %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vle.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>*,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vle_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat>* %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vle.nxv32bf16(
+ <vscale x 32 x bfloat> undef,
+ <vscale x 32 x bfloat>* %0,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vle.mask.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>*,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vle_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat>* %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vle.mask.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat>* %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>*,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vse.ll b/llvm/test/CodeGen/RISCV/rvv/vse.ll
index bd35e4850ce5d5..d789ff7164e527 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vse.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \
; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \
; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
declare void @llvm.riscv.vse.nxv1i64(
<vscale x 1 x i64>,
@@ -1263,6 +1263,258 @@ entry:
ret void
}
+declare void @llvm.riscv.vse.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>*,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat>* %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat>* %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>*,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat>* %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat>* %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>*,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat>* %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat>* %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>*,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat>* %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat>* %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>*,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat>* %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat>* %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>*,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat>* %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat>* %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>*,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat>* %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat>* %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>*,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat>* %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat>* %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>*,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat>* %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat>* %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>*,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat>* %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat>* %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>*,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat>* %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat>* %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>*,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat>* %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat>* %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
declare void @llvm.riscv.vse.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>*,
More information about the llvm-commits
mailing list