r261777 - [CUDA] Add conversion operators for threadIdx, blockIdx, gridDim, and blockDim to uint3 and dim3.

Justin Lebar via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 24 13:49:34 PST 2016


Author: jlebar
Date: Wed Feb 24 15:49:33 2016
New Revision: 261777

URL: http://llvm.org/viewvc/llvm-project?rev=261777&view=rev
Log:
[CUDA] Add conversion operators for threadIdx, blockIdx, gridDim, and blockDim to uint3 and dim3.

Summary:
This lets you write, e.g.

  uint3 a = threadIdx;
  uint3 b = blockIdx;
  dim3 c = gridDim;
  dim3 d = blockDim;

which is legal in nvcc, but was not legal in clang.

The fact that e.g. the type of threadIdx is not actually uint3 is still
observable, but now you have to try to observe it.

Reviewers: tra

Subscribers: echristo, cfe-commits

Differential Revision: http://reviews.llvm.org/D17561

Modified:
    cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h
    cfe/trunk/lib/Headers/cuda_builtin_vars.h

Modified: cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h?rev=261777&r1=261776&r2=261777&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h (original)
+++ cfe/trunk/lib/Headers/__clang_cuda_runtime_wrapper.h Wed Feb 24 15:49:33 2016
@@ -245,6 +245,33 @@ __device__ static inline void *malloc(si
 }
 } // namespace std
 
+// Out-of-line implementations from cuda_builtin_vars.h.  These need to come
+// after we've pulled in the definition of uint3 and dim3.
+
+__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
+__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
 #include <__clang_cuda_cmath.h>
 
 // curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host

Modified: cfe/trunk/lib/Headers/cuda_builtin_vars.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/cuda_builtin_vars.h?rev=261777&r1=261776&r2=261777&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/cuda_builtin_vars.h (original)
+++ cfe/trunk/lib/Headers/cuda_builtin_vars.h Wed Feb 24 15:49:33 2016
@@ -24,10 +24,14 @@
 #ifndef __CUDA_BUILTIN_VARS_H
 #define __CUDA_BUILTIN_VARS_H
 
+// Forward declares from vector_types.h.
+struct uint3;
+struct dim3;
+
 // The file implements built-in CUDA variables using __declspec(property).
 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
 // All read accesses of built-in variable fields get converted into calls to a
-// getter function which in turn would call appropriate builtin to fetch the
+// getter function which in turn calls the appropriate builtin to fetch the
 // value.
 //
 // Example:
@@ -63,6 +67,9 @@ struct __cuda_builtin_threadIdx_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+  // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
 };
@@ -71,6 +78,9 @@ struct __cuda_builtin_blockIdx_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+  // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
 };
@@ -79,6 +89,9 @@ struct __cuda_builtin_blockDim_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+  // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
 };
@@ -87,6 +100,9 @@ struct __cuda_builtin_gridDim_t {
   __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
   __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
   __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+  // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
 };




More information about the cfe-commits mailing list