[clang] 510fd28 - [NVPTX] Add ex2.approx.f16/f16x2 support
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 1 11:08:04 PST 2022
Author: Nicolas Miller
Date: 2022-03-01T11:07:11-08:00
New Revision: 510fd283fda2d7c5118ae1b451a1f2365cfc3f27
URL: https://github.com/llvm/llvm-project/commit/510fd283fda2d7c5118ae1b451a1f2365cfc3f27
DIFF: https://github.com/llvm/llvm-project/commit/510fd283fda2d7c5118ae1b451a1f2365cfc3f27.diff
LOG: [NVPTX] Add ex2.approx.f16/f16x2 support
NOTE: this is a follow-up commit with the missing clang-side changes.
This patch adds builtins and intrinsics for the f16 and f16x2 variants of the ex2
instruction.
These two variants were added in PTX7.0, and are supported by sm_75 and above.
Note that this isn't wired with the exp2 llvm intrinsic because the ex2
instruction is only available in its approx variant.
Running ptxas on the assembly generated by the test f16-ex2.ll works as
expected.
Differential Revision: https://reviews.llvm.org/D119157
Added:
Modified:
clang/include/clang/Basic/BuiltinsNVPTX.def
clang/test/CodeGen/builtins-nvptx-native-half-type.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index a058925a6a5f6..04bed16c9958e 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -282,6 +282,8 @@ BUILTIN(__nvvm_saturate_d, "dd", "")
BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_d, "dd", "")
+TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
+TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_lg2_approx_f, "ff", "")
diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type.c b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
index c232c4de5640a..95021f274cd0f 100644
--- a/clang/test/CodeGen/builtins-nvptx-native-half-type.c
+++ b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
@@ -1,4 +1,9 @@
// REQUIRES: nvptx-registered-target
+//
+// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
+// RUN: sm_75 -target-feature +ptx70 -fcuda-is-device -fnative-half-type -S \
+// RUN: -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX70_SM75 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
// RUN: sm_80 -target-feature +ptx70 -fcuda-is-device -fnative-half-type -S \
@@ -32,6 +37,16 @@
#define __device__ __attribute__((device))
+__device__ void nvvm_ex2_sm75() {
+#if __CUDA_ARCH__ >= 750
+ // CHECK_PTX70_SM75: call half @llvm.nvvm.ex2.approx.f16
+ __nvvm_ex2_approx_f16(0.1f16);
+ // CHECK_PTX70_SM75: call <2 x half> @llvm.nvvm.ex2.approx.f16x2
+ __nvvm_ex2_approx_f16x2({0.1f16, 0.7f16});
+#endif
+ // CHECK: ret void
+}
+
// CHECK-LABEL: nvvm_min_max_sm80
__device__ void nvvm_min_max_sm80() {
#if __CUDA_ARCH__ >= 800
More information about the cfe-commits
mailing list