[llvm] 71b0658 - [NVPTX] Add f16 and v2f16 ldg builtins
Jakub Chlanda via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 3 03:49:31 PST 2023
Author: Jakub Chlanda
Date: 2023-03-03T12:49:18+01:00
New Revision: 71b06585857a77691761a7bfd16b5b91454a6894
URL: https://github.com/llvm/llvm-project/commit/71b06585857a77691761a7bfd16b5b91454a6894
DIFF: https://github.com/llvm/llvm-project/commit/71b06585857a77691761a7bfd16b5b91454a6894.diff
LOG: [NVPTX] Add f16 and v2f16 ldg builtins
Adds f16 and v2f16 ldg builtins and relevant tests.
Differential Revision: https://reviews.llvm.org/D144961
Added:
Modified:
clang/include/clang/Basic/BuiltinsNVPTX.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-nvptx-native-half-type.c
llvm/test/CodeGen/NVPTX/ldu-ldg.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index ea0cd8c3e8431..7fcd906c599b8 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -795,6 +795,7 @@ BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
+BUILTIN(__nvvm_ldg_h, "hhC*", "")
BUILTIN(__nvvm_ldg_f, "ffC*", "")
BUILTIN(__nvvm_ldg_d, "ddC*", "")
@@ -814,6 +815,7 @@ BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
+BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "")
BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 1535b14c7fb40..07a39bca9d7a2 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18228,7 +18228,9 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
// elements, its alignment is set to number of elements times the alignment
// of its member: n*alignof(t)."
return MakeLdg(Intrinsic::nvvm_ldg_global_i);
+ case NVPTX::BI__nvvm_ldg_h:
case NVPTX::BI__nvvm_ldg_f:
+ case NVPTX::BI__nvvm_ldg_h2:
case NVPTX::BI__nvvm_ldg_f2:
case NVPTX::BI__nvvm_ldg_f4:
case NVPTX::BI__nvvm_ldg_d:
diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type.c b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
index 95021f274cd0f..9dc61d6014210 100644
--- a/clang/test/CodeGen/builtins-nvptx-native-half-type.c
+++ b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
@@ -172,3 +172,12 @@ __device__ void nvvm_min_max_sm86() {
#endif
// CHECK: ret void
}
+
+// CHECK-LABEL: nvvm_ldg_native_half_types
+__device__ void nvvm_ldg_native_half_types(const void *p) {
+ // CHECK: call half @llvm.nvvm.ldg.global.f.f16.p0
+ __nvvm_ldg_h((const __fp16 *)p);
+ typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2)));
+ // CHECK: call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p0
+ __nvvm_ldg_h2((const __fp16v2 *)p);
+}
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index 6d5fcb4cd317e..d40eb7a32027d 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -4,34 +4,82 @@
declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+
declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
-
-; CHECK: func0
-define i8 @func0(ptr addrspace(1) %ptr) {
+; CHECK: test_ldu_i8
+define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
; ldu.global.u8
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
ret i8 %val
}
-; CHECK: func1
-define i32 @func1(ptr addrspace(1) %ptr) {
+; CHECK: test_ldu_i32
+define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
; ldu.global.u32
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
ret i32 %val
}
-; CHECK: func2
-define i8 @func2(ptr addrspace(1) %ptr) {
+; CHECK: test_ldg_i8
+define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
; ld.global.nc.u8
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
ret i8 %val
}
-; CHECK: func3
-define i32 @func3(ptr addrspace(1) %ptr) {
+; CHECK: test_ldg_i16
+define i16 @test_ldg_i16(ptr addrspace(1) %ptr) {
+; ld.global.nc.u16
+ %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret i16 %val
+}
+
+; CHECK: test_ldg_i32
+define i32 @test_ldg_i32(ptr addrspace(1) %ptr) {
; ld.global.nc.u32
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
ret i32 %val
}
+
+; CHECK: test_ldg_i64
+define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
+; ld.global.nc.u64
+ %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret i64 %val
+}
+
+; CHECK: test_ldg_f32
+define float @test_ldg_f32(ptr addrspace(1) %ptr) {
+; ld.global.nc.u64
+ %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+ ret float %val
+}
+
+; CHECK: test_ldg_f64
+define double @test_ldg_f64(ptr addrspace(1) %ptr) {
+; ld.global.nc.u64
+ %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+ ret double %val
+}
+
+; CHECK: test_ldg_f16
+define half @test_ldg_f16(ptr addrspace(1) %ptr) {
+; ld.global.nc.b16
+ %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret half %val
+}
+
+; CHECK: test_ldg_v2f16
+define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
+; ld.global.nc.b32
+ %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+ ret <2 x half> %val
+}
More information about the llvm-commits
mailing list