[PATCH] D119157: [NVPTX] Add ex2 f16 support
Nicolas Miller via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Mon Feb 7 09:46:01 PST 2022
npmiller created this revision.
Herald added subscribers: asavonic, hiraditya, jholewinski.
npmiller requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, jdoerfert.
Herald added projects: clang, LLVM.
This patch adds builtins and intrinsics for the `f16` and `f16x2` variants of the `ex2` instruction.
These two variants were added in PTX7.0, and are supported by sm_75 and above.
Note that this isn't wired with the `exp2` llvm intrinsic because the `ex2` instruction is only available in its `approx` variant.
Running `ptxas` on the assembly generated by the test `f16-ex2.ll` works as expected.
Depends on D118977 <https://reviews.llvm.org/D118977>
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D119157
Files:
clang/include/clang/Basic/BuiltinsNVPTX.def
clang/test/CodeGen/builtins-nvptx-native-half-type.c
llvm/include/llvm/IR/IntrinsicsNVVM.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/test/CodeGen/NVPTX/f16-ex2.ll
Index: llvm/test/CodeGen/NVPTX/f16-ex2.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/f16-ex2.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_75 -mattr=+ptx70 | FileCheck %s
+
+declare half @llvm.nvvm.ex2.approx.f16(half)
+declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>)
+
+; CHECK-LABEL: exp2_half
+define half @exp2_half(half %0) {
+ ; CHECK-NOT: call
+ ; CHECK: ex2.approx.f16
+ %res = call half @llvm.nvvm.ex2.approx.f16(half %0);
+ ret half %res
+}
+
+; CHECK-LABEL: exp2_2xhalf
+define <2 x half> @exp2_2xhalf(<2 x half> %0) {
+ ; CHECK-NOT: call
+ ; CHECK: ex2.approx.f16x2
+ %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0);
+ ret <2 x half> %res
+}
Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -907,6 +907,10 @@
Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
+def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
+ Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX70, hasSM75]>;
+def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
+ Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX70, hasSM75]>;
def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
Index: llvm/include/llvm/IR/IntrinsicsNVVM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -828,6 +828,10 @@
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_ex2_approx_f16 : GCCBuiltin<"__nvvm_ex2_approx_f16">,
+ DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
+ def int_nvvm_ex2_approx_f16x2 : GCCBuiltin<"__nvvm_ex2_approx_f16x2">,
+ DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
Index: clang/test/CodeGen/builtins-nvptx-native-half-type.c
===================================================================
--- clang/test/CodeGen/builtins-nvptx-native-half-type.c
+++ clang/test/CodeGen/builtins-nvptx-native-half-type.c
@@ -1,4 +1,9 @@
// REQUIRES: nvptx-registered-target
+//
+// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
+// RUN: sm_75 -target-feature +ptx70 -fcuda-is-device -fnative-half-type -S \
+// RUN: -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX70_SM75 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
// RUN: sm_80 -target-feature +ptx70 -fcuda-is-device -fnative-half-type -S \
@@ -32,6 +37,16 @@
#define __device__ __attribute__((device))
+__device__ void nvvm_ex2_sm75() {
+#if __CUDA_ARCH__ >= 750
+ // CHECK_PTX70_SM75: call half @llvm.nvvm.ex2.approx.f16
+ __nvvm_ex2_approx_f16(0.1f16);
+ // CHECK_PTX70_SM75: call <2 x half> @llvm.nvvm.ex2.approx.f16x2
+ __nvvm_ex2_approx_f16x2({0.1f16, 0.7f16});
+#endif
+ // CHECK: ret void
+}
+
// CHECK-LABEL: nvvm_min_max_sm80
__device__ void nvvm_min_max_sm80() {
#if __CUDA_ARCH__ >= 800
Index: clang/include/clang/Basic/BuiltinsNVPTX.def
===================================================================
--- clang/include/clang/Basic/BuiltinsNVPTX.def
+++ clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -278,6 +278,8 @@
BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_d, "dd", "")
+TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
+TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_lg2_approx_f, "ff", "")
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D119157.406507.patch
Type: text/x-patch
Size: 4318 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220207/311f3cdc/attachment.bin>
More information about the cfe-commits
mailing list