[clang] f526ee5 - [CUDA] Provide address space conversion builtins.
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 12 14:57:10 PDT 2021
Author: Artem Belevich
Date: 2021-10-12T14:56:39-07:00
New Revision: f526ee5b8517b60620cd03bb3e5945ed69d6bfaa
URL: https://github.com/llvm/llvm-project/commit/f526ee5b8517b60620cd03bb3e5945ed69d6bfaa
DIFF: https://github.com/llvm/llvm-project/commit/f526ee5b8517b60620cd03bb3e5945ed69d6bfaa.diff
LOG: [CUDA] Provide address space conversion builtins.
CUDA-11 headers rely on these NVCC builtins.
Despite having `__nv` previx, those are *not* provided by libdevice.
Differential Revision: https://reviews.llvm.org/D111665
Added:
Modified:
clang/lib/Headers/__clang_cuda_intrinsics.h
Removed:
################################################################################
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index c7bff6a9d8fe..e0875bbcbf4a 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -483,4 +483,36 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+#if CUDA_VERSION >= 11000
+extern "C" {
+__device__ inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(1))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(3))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(4))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(5))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(1))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(3))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(4))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(5))) *)__ptr;
+}
+__device__ inline uint32_t __nvvm_get_smem_pointer(void *__ptr) {
+ return __nv_cvta_generic_to_shared_impl(__ptr);
+}
+} // extern "C"
+#endif // CUDA_VERSION >= 11000
+
#endif // defined(__CLANG_CUDA_INTRINSICS_H__)
More information about the cfe-commits
mailing list