[clang] 6d614a8 - Summary:
Konstantin Pyzhov via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 28 08:48:50 PST 2020
Author: Konstantin Pyzhov
Date: 2020-01-28T03:51:27-05:00
New Revision: 6d614a82a4230ea69e322f56dc18dcbd815ed37b
URL: https://github.com/llvm/llvm-project/commit/6d614a82a4230ea69e322f56dc18dcbd815ed37b
DIFF: https://github.com/llvm/llvm-project/commit/6d614a82a4230ea69e322f56dc18dcbd815ed37b.diff
LOG: Summary:
This CL adds clang declarations of built-in functions for AMDGPU MFMA intrinsics and instructions.
OpenCL tests for new built-ins are included.
Differential Revision: https://reviews.llvm.org/D72723
Added:
clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/lib/Basic/Targets/AMDGPU.cpp
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 9b3a0f96798f..a9143ad8292c 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -212,5 +212,30 @@ BUILTIN(__builtin_r600_read_tidig_z, "Ui", "nc")
BUILTIN(__builtin_r600_recipsqrt_ieee, "dd", "nc")
BUILTIN(__builtin_r600_recipsqrt_ieeef, "ff", "nc")
+//===----------------------------------------------------------------------===//
+// MFMA builtins.
+//===----------------------------------------------------------------------===//
+
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x1f32, "V32fffV32fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x1f32, "V16fffV16fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x1f32, "V4fffV4fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2f32, "V16fffV16fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f32, "V4fffV4fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4f16, "V32fV4hV4hV32fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f16, "V16fV4hV4hV16fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x4f16, "V4fV4hV4hV4fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x8f16, "V16fV4hV4hV16fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x16f16, "V4fV4hV4hV4fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x4i8, "V32iiiV32iIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x4i8, "V16iiiV16iIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_4x4x4i8, "V4iiiV4iIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x8i8, "V16iiiV16iIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x16i8, "V4iiiV4iIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2bf16, "V32fV2sV2sV32fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x2bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x2bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x8bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 249a123ea605..0aaf6813442a 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -163,6 +163,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["dot4-insts"] = true;
Features["dot5-insts"] = true;
Features["dot6-insts"] = true;
+ Features["mai-insts"] = true;
LLVM_FALLTHROUGH;
case GK_GFX906:
Features["dl-insts"] = true;
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 71cea8c1f3d5..68e8a830ecac 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1725,105 +1725,125 @@ def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn;
def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn;
// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
-def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty],
- [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty],
- [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty],
- [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty],
- [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty],
- [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty],
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty],
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty],
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty],
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty],
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty],
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty],
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty],
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,
+ Intrinsic<[llvm_v32f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">,
+ Intrinsic<[llvm_v32f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">,
+ Intrinsic<[llvm_v32i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">,
+ Intrinsic<[llvm_v32f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
More information about the cfe-commits
mailing list