[clang] [llvm] [NVPTX] Add clang builtin for `__nvvm_reflect` intrinsic (PR #81277)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 9 11:26:44 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/81277

>From 7b97388a5f251684cf4ae69c3b0cae0ff6fe1397 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 9 Feb 2024 10:50:20 -0600
Subject: [PATCH 1/2] [NVPTX] Add clang builtin for `__nvvm_reflect` intrinsic

Summary:
Some recent support made usage of `__nvvm_reflect` more consistent. We
should expose it as an intrinsic rather than forcing users to externally
define the function.
---
 clang/include/clang/Basic/BuiltinsNVPTX.def | 1 +
 clang/test/CodeGen/builtins-nvptx.c         | 8 ++++++++
 llvm/include/llvm/IR/IntrinsicsNVVM.td      | 5 +++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 7819e71d7fe2aa..8d3c5e69d55cf4 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -159,6 +159,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
 
 BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
 BUILTIN(__nvvm_exit, "v", "r")
+BUILTIN(__nvvm_reflect, "UicC*", "r")
 TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63))
 
 // Min Max
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index ad7c27f2d60d26..4dba7670b5c43e 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -44,6 +44,14 @@ __device__ int read_tid() {
 
 }
 
+__device__ bool reflect() {
+
+// CHECK: call i32 @llvm.nvvm.reflect(ptr {{.*}})
+
+  unsigned x = __nvvm_reflect("__CUDA_ARCH");
+  return x >= 700;
+}
+
 __device__ int read_ntid() {
 
 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index d825dc82156432..f7b0fe926959b1 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1624,8 +1624,9 @@ def int_nvvm_compiler_error :
 def int_nvvm_compiler_warn :
     Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
 
-def int_nvvm_reflect :
-  Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.reflect">;
+def int_nvvm_reflect : 
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">,
+  ClangBuiltin<"__nvvm_reflect">;
 
 // isspacep.{const, global, local, shared}
 def int_nvvm_isspacep_const

>From d36b7484f67a617629d021da8eb4e80e625eb2a2 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 9 Feb 2024 13:26:21 -0600
Subject: [PATCH 2/2] OpenCL test and fix tests

---
 clang/test/CodeGenOpenCL/reflect.cl           | 28 +++++++++++++++++++
 .../test/CodeGen/NVPTX/nvvm-reflect-opaque.ll |  4 +--
 llvm/test/CodeGen/NVPTX/nvvm-reflect.ll       |  4 +--
 3 files changed, 32 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/reflect.cl

diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
new file mode 100644
index 00000000000000..9ae4a5f027d358
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -0,0 +1,28 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
+
+// CHECK-LABEL: define dso_local zeroext i1 @device_function(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.nvvm.reflect(ptr addrspacecast (ptr addrspace(4) @.str to ptr))
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700
+// CHECK-NEXT:    ret i1 [[CMP]]
+//
+bool device_function() {
+  return __nvvm_reflect("__CUDA_ARCH") >= 700;
+}
+
+// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
+// CHECK-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CALL]] to i32
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4
+// CHECK-NEXT:    ret void
+//
+__kernel void kernel_function(__global int *i) {
+  *i = device_function();
+}
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
index 1cb5c87fae826b..46ab79d9858cad 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
@@ -41,7 +41,7 @@ exit:
   ret float %ret
 }
 
-declare i32 @llvm.nvvm.reflect.p0(ptr)
+declare i32 @llvm.nvvm.reflect(ptr)
 
 ; CHECK-LABEL: define noundef i32 @intrinsic
 define i32 @intrinsic() {
@@ -49,7 +49,7 @@ define i32 @intrinsic() {
 ; USE_FTZ_0: ret i32 0
 ; USE_FTZ_1: ret i32 1
   %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
-  %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
+  %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr)
   ret i32 %reflect
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
index 9b1939f372082f..2ed9f7c11bcf9b 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -41,7 +41,7 @@ exit:
   ret float %ret
 }
 
-declare i32 @llvm.nvvm.reflect.p0(ptr)
+declare i32 @llvm.nvvm.reflect(ptr)
 
 ; CHECK-LABEL: define noundef i32 @intrinsic
 define i32 @intrinsic() {
@@ -49,7 +49,7 @@ define i32 @intrinsic() {
 ; USE_FTZ_0: ret i32 0
 ; USE_FTZ_1: ret i32 1
   %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
-  %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
+  %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr)
   ret i32 %reflect
 }
 



More information about the cfe-commits mailing list