[clang] 30514f0 - [CUDA] Added conversion functions to builtin vars.

Thu Sep 24 14:33:44 PDT 2020

Author: Artem Belevich
Date: 2020-09-24T14:33:04-07:00
New Revision: 30514f0afa3ee1e6da6bf9c41e83c28e884f0740

URL: https://github.com/llvm/llvm-project/commit/30514f0afa3ee1e6da6bf9c41e83c28e884f0740
DIFF: https://github.com/llvm/llvm-project/commit/30514f0afa3ee1e6da6bf9c41e83c28e884f0740.diff

LOG: [CUDA] Added conversion functions to builtin vars.

This is needed to compile some headers in CUDA-11 that assume that threadIdx is
implicitly convertible to dim3. With NVCC, threadIdx is uint3 and there's
dim3(uint3) constructor. Clang uses a special type for the builtin variables, so
that path does not work. Instead, this patch adds conversion function to the
builtin variable classes. that will allow them to be converted to dim3 and uint3.

Differential Revision: https://reviews.llvm.org/D88250

Added: 
    

Modified: 
    clang/lib/Headers/__clang_cuda_builtin_vars.h
    clang/lib/Headers/__clang_cuda_runtime_wrapper.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_cuda_builtin_vars.h b/clang/lib/Headers/__clang_cuda_builtin_vars.h
index 2ba1521f2580..412e823a827f 100644

--- a/clang/lib/Headers/__clang_cuda_builtin_vars.h
+++ b/clang/lib/Headers/__clang_cuda_builtin_vars.h
@@ -55,7 +55,9 @@ struct __cuda_builtin_threadIdx_t {
   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
   // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
   // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
   __attribute__((device)) operator uint3() const;
+
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
 };
@@ -66,7 +68,9 @@ struct __cuda_builtin_blockIdx_t {
   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
   // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
   // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
   __attribute__((device)) operator uint3() const;
+
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
 };
@@ -78,6 +82,8 @@ struct __cuda_builtin_blockDim_t {
   // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
   // dim3).  This function is defined after we pull in vector_types.h.
   __attribute__((device)) operator dim3() const;
+  __attribute__((device)) operator uint3() const;
+
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
 };
@@ -89,6 +95,8 @@ struct __cuda_builtin_gridDim_t {
   // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
   // dim3).  This function is defined after we pull in vector_types.h.
   __attribute__((device)) operator dim3() const;
+  __attribute__((device)) operator uint3() const;
+
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
 };
@@ -108,5 +116,6 @@ __attribute__((device)) const int warpSize = 32;
 #undef __CUDA_DEVICE_BUILTIN
 #undef __CUDA_BUILTIN_VAR
 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
+#undef __DELETE
 
 #endif /* __CUDA_BUILTIN_VARS_H */

diff  --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index f43ed55de489..f88c39a9b6e5 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -377,30 +377,38 @@ __device__ static inline void *malloc(size_t __size) {
 // Out-of-line implementations from __clang_cuda_builtin_vars.h.  These need to
 // come after we've pulled in the definition of uint3 and dim3.
 
+__device__ inline __cuda_builtin_threadIdx_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
 __device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
-  uint3 ret;
-  ret.x = x;
-  ret.y = y;
-  ret.z = z;
-  return ret;
+  return {x, y, z};
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator dim3() const {
+  return dim3(x, y, z);
 }
 
 __device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
-  uint3 ret;
-  ret.x = x;
-  ret.y = y;
-  ret.z = z;
-  return ret;
+  return {x, y, z};
 }
 
 __device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
   return dim3(x, y, z);
 }
 
+__device__ inline __cuda_builtin_blockDim_t::operator uint3() const {
+  return {x, y, z};
+}
+
 __device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
   return dim3(x, y, z);
 }
 
+__device__ inline __cuda_builtin_gridDim_t::operator uint3() const {
+  return {x, y, z};
+}
+
 #include <__clang_cuda_cmath.h>
 #include <__clang_cuda_intrinsics.h>
 #include <__clang_cuda_complex_builtins.h>