[clang] [llvm] [X86][AVX10.2] Support AVX10.2-BF16 new instructions. (PR #101603)
Phoebe Wang via cfe-commits
cfe-commits at lists.llvm.org
Tue Aug 27 06:50:35 PDT 2024
================
@@ -910,3 +910,313 @@ multiclass avx10_convert_2op_nomb<string OpcodeStr, AVX512VLVectorVTInfo _dest,
defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info,
avx512vl_i8_info, 0x1e, X86vcvthf82ph>,
AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>;
+
+//-------------------------------------------------
+// AVX10 BF16 instructions
+//-------------------------------------------------
+
+// VADDNEPBF16, VSUBNEPBF16, VMULNEPBF16, VDIVNEPBF16, VMAXPBF16, VMINPBF16
+multiclass avx10_fp_binopne_int_pbf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+multiclass avx10_fp_binop_pbf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0,
+ SDPatternOperator MaskOpNode = OpNode> {
+ let Predicates = [HasAVX10_2_512] in
+ defm NEPBF16Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm NEPBF16Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm NEPBF16Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VADD : avx10_fp_binop_pbf16<0x58, "vaddne", fadd, SchedWriteFAddSizes, 1>;
+defm VSUB : avx10_fp_binop_pbf16<0x5C, "vsubne", fsub, SchedWriteFAddSizes, 0>;
+defm VMUL : avx10_fp_binop_pbf16<0x59, "vmulne", fmul, SchedWriteFMulSizes, 0>;
+defm VDIV : avx10_fp_binop_pbf16<0x5E, "vdivne", fdiv, SchedWriteFDivSizes, 0>;
+defm VMIN : avx10_fp_binopne_int_pbf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>;
+defm VMAX : avx10_fp_binopne_int_pbf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
+}
+
+// VCOMSBF16
+let Uses = []<Register>, mayRaiseFPException = 0,
+ Defs = [EFLAGS], Predicates = [HasAVX10_2_512] in {
+ defm VCOMSBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
+ "comsbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
+
+ let isCodeGenOnly = 1 in {
+ defm VCOMSBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
+ sse_load_bf16, "comsbf16", SSEPackedSingle>,
+ T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ }
+}
+
+// VCMPPBF16
+multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+ let mayRaiseFPException = 0 in {
+ defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ 1>, Sched<[sched]>;
+
+ defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ timm:$cc)>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
+
+ defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $cc",
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc)>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ }
+}
+
+multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX10_2_512] in
+ defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256;
+ }
+}
+
+defm VCMPPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>,
+ AVX512XDIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
+
+
+// VSQRTNEPBF16
+multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteSizes sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm NEPBF16Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
----------------
phoebewang wrote:
`NEPBF16` can move out.
https://github.com/llvm/llvm-project/pull/101603
More information about the cfe-commits
mailing list