[clang] f526ee5 - [CUDA] Provide address space conversion builtins.

Tue Oct 12 14:57:10 PDT 2021

Author: Artem Belevich
Date: 2021-10-12T14:56:39-07:00
New Revision: f526ee5b8517b60620cd03bb3e5945ed69d6bfaa

URL: https://github.com/llvm/llvm-project/commit/f526ee5b8517b60620cd03bb3e5945ed69d6bfaa
DIFF: https://github.com/llvm/llvm-project/commit/f526ee5b8517b60620cd03bb3e5945ed69d6bfaa.diff

LOG: [CUDA] Provide address space conversion builtins.

CUDA-11 headers rely on these NVCC builtins.
Despite having `__nv` previx, those are *not* provided by libdevice.

Differential Revision: https://reviews.llvm.org/D111665

Added: 
    

Modified: 
    clang/lib/Headers/__clang_cuda_intrinsics.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index c7bff6a9d8fe..e0875bbcbf4a 100644

--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -483,4 +483,36 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
 
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
 
+#if CUDA_VERSION >= 11000
+extern "C" {
+__device__ inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) {
+  return (size_t)(void __attribute__((address_space(1))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) {
+  return (size_t)(void __attribute__((address_space(3))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) {
+  return (size_t)(void __attribute__((address_space(4))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) {
+  return (size_t)(void __attribute__((address_space(5))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) {
+  return (void *)(void __attribute__((address_space(1))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) {
+  return (void *)(void __attribute__((address_space(3))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) {
+  return (void *)(void __attribute__((address_space(4))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
+  return (void *)(void __attribute__((address_space(5))) *)__ptr;
+}
+__device__ inline uint32_t __nvvm_get_smem_pointer(void *__ptr) {
+  return __nv_cvta_generic_to_shared_impl(__ptr);
+}
+} // extern "C"
+#endif // CUDA_VERSION >= 11000
+
 #endif // defined(__CLANG_CUDA_INTRINSICS_H__)