[clang] [llvm] [NVPTX] Add clang builtin for `__nvvm_reflect` intrinsic (PR #81277)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 12:10:59 PST 2024
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/81277
>From 5c9bc83db318d5c8608108942e494d6f0c1a27d5 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 9 Feb 2024 10:50:20 -0600
Subject: [PATCH] [NVPTX] Add clang builtin for `__nvvm_reflect` intrinsic
Summary:
Some recent support made usage of `__nvvm_reflect` more consistent. We
should expose it as an intrinsic rather than forcing users to externally
define the function.
---
clang/include/clang/Basic/BuiltinsNVPTX.def | 1 +
clang/test/CodeGen/builtins-nvptx.c | 8 ++++++
clang/test/CodeGenOpenCL/reflect.cl | 28 +++++++++++++++++++
llvm/include/llvm/IR/IntrinsicsNVVM.td | 3 +-
.../test/CodeGen/NVPTX/nvvm-reflect-opaque.ll | 4 +--
llvm/test/CodeGen/NVPTX/nvvm-reflect.ll | 4 +--
6 files changed, 43 insertions(+), 5 deletions(-)
create mode 100644 clang/test/CodeGenOpenCL/reflect.cl
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 7819e71d7fe2aa..8d3c5e69d55cf4 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -159,6 +159,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
BUILTIN(__nvvm_exit, "v", "r")
+BUILTIN(__nvvm_reflect, "UicC*", "r")
TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63))
// Min Max
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index ad7c27f2d60d26..4dba7670b5c43e 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -44,6 +44,14 @@ __device__ int read_tid() {
}
+__device__ bool reflect() {
+
+// CHECK: call i32 @llvm.nvvm.reflect(ptr {{.*}})
+
+ unsigned x = __nvvm_reflect("__CUDA_ARCH");
+ return x >= 700;
+}
+
__device__ int read_ntid() {
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
new file mode 100644
index 00000000000000..9ae4a5f027d358
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -0,0 +1,28 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
+
+// CHECK-LABEL: define dso_local zeroext i1 @device_function(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.reflect(ptr addrspacecast (ptr addrspace(4) @.str to ptr))
+// CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700
+// CHECK-NEXT: ret i1 [[CMP]]
+//
+bool device_function() {
+ return __nvvm_reflect("__CUDA_ARCH") >= 700;
+}
+
+// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
+// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
+// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4
+// CHECK-NEXT: ret void
+//
+__kernel void kernel_function(__global int *i) {
+ *i = device_function();
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index d825dc82156432..726cea004606e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1625,7 +1625,8 @@ def int_nvvm_compiler_warn :
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
def int_nvvm_reflect :
- Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.reflect">;
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">,
+ ClangBuiltin<"__nvvm_reflect">;
// isspacep.{const, global, local, shared}
def int_nvvm_isspacep_const
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
index 1cb5c87fae826b..46ab79d9858cad 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
@@ -41,7 +41,7 @@ exit:
ret float %ret
}
-declare i32 @llvm.nvvm.reflect.p0(ptr)
+declare i32 @llvm.nvvm.reflect(ptr)
; CHECK-LABEL: define noundef i32 @intrinsic
define i32 @intrinsic() {
@@ -49,7 +49,7 @@ define i32 @intrinsic() {
; USE_FTZ_0: ret i32 0
; USE_FTZ_1: ret i32 1
%ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
- %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
+ %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr)
ret i32 %reflect
}
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
index 9b1939f372082f..2ed9f7c11bcf9b 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -41,7 +41,7 @@ exit:
ret float %ret
}
-declare i32 @llvm.nvvm.reflect.p0(ptr)
+declare i32 @llvm.nvvm.reflect(ptr)
; CHECK-LABEL: define noundef i32 @intrinsic
define i32 @intrinsic() {
@@ -49,7 +49,7 @@ define i32 @intrinsic() {
; USE_FTZ_0: ret i32 0
; USE_FTZ_1: ret i32 1
%ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
- %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
+ %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr)
ret i32 %reflect
}
More information about the llvm-commits
mailing list