[Openmp-commits] [PATCH] D106751: [OpenMP][Libomptarget] Adding `print_device_info` to RTL and `omptarget`

Senran Zhang via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Tue Jul 27 21:05:06 PDT 2021


zsrkmyn added inline comments.


================
Comment at: openmp/libomptarget/plugins/cuda/src/rtl.cpp:1187
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, Device),
+        "Error returned from cuDeviceGetAttribute\n");
----------------
zsrkmyn wrote:
> jdoerfert wrote:
> > jhuber6 wrote:
> > > zsrkmyn wrote:
> > > > Hi, I just found the macro CU_DEVICE_ATTRIBUTE_GPU_OVERLAP is defined nowhere inside the llvm source tree, leading to compilation failure on machines w/o cuda SDK. Could you help take a look?
> > > Someone needs to add it to `/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h` I'm assuming.
> > Only this one?
> From my build log, there's no matching function for call to the following 2 functinos,
> 
> ```
> cuDeviceGetName
> cuDeviceTotalMem
> ```
> 
> And the following macros are not defined
> 
> ```
> CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
> CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y
> CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z
> CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y
> CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z
> CU_DEVICE_ATTRIBUTE_MAX_PITCH
> CU_DEVICE_ATTRIBUTE_CLOCK_RATE
> CU_DEVICE_ATTRIBUTE_INTEGRATED
> CU_DEVICE_ATTRIBUTE_COMPUTE_MODE
> ```
> 
> Not sure if there are more errors.
My build passed w/ the following patch.

There are quite a lot macros are missing and I'm too lazy to check one by one, so I just add all of them from [1]. I would appreciate a lot if someone could help land it.

[1] https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE.html

```
diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
index 0814db7e9d26..14049e1f7559 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
@@ -46,9 +46,132 @@ typedef enum CUlimit_enum {
 } CUlimit;
 
 typedef enum CUdevice_attribute_enum {
+  CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
   CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
+  CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
+  CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
   CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
+  CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
+  CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
+  CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
+  CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
+  CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
   CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
+  CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
+  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
+  CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
+  CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
+  CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
+  CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
+  CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
+  CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
+  CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
+  CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
+  CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
+  CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
+  CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
+  CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
+  CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
+  CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
+  CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
+  CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
+  CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
+  CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
+  CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
+  CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
+  CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
+  CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
+  CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
+  CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
+  CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
+  CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
+  CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
+  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
+  CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
+  CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
+  CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
+  CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
+  CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
+  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
+  CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
+  CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
+  CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
+  CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
+  CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
+  CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
+  CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
+  CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
+  CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
+  CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
+  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
+  CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
+  CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
+  CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
+  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
+  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
+  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
+  CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
+  CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
+  CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
+  CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
+  CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
+  CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
+  CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
+  CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
+  CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
+  CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
+  CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116,
+  CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117,
+  CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118,
+  CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119,
+  CU_DEVICE_ATTRIBUTE_MAX,
 } CUdevice_attribute;
 
 typedef enum CUfunction_attribute_enum {
@@ -66,6 +189,12 @@ typedef enum CUmemAttach_flags_enum {
   CU_MEM_ATTACH_SINGLE = 0x4,
 } CUmemAttach_flags;
 
+typedef enum CUcomputeMode_enum {
+  CU_COMPUTEMODE_DEFAULT = 0,
+  CU_COMPUTEMODE_PROHIBITED = 2,
+  CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
+} CUcompute_mode;
+
 CUresult cuCtxGetDevice(CUdevice *);
 CUresult cuDeviceGet(CUdevice *, int);
 CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
@@ -73,8 +202,8 @@ CUresult cuDeviceGetCount(int *);
 CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
 
 // Device info
-CUresult cuDeviceGetName(char *, int, CUdevice *);
-CUresult cuDeviceTotalMem(size_t *, CUdevice *);
+CUresult cuDeviceGetName(char *, int, CUdevice);
+CUresult cuDeviceTotalMem(size_t *, CUdevice);
 CUresult cuDriverGetVersion(int *);
 
 CUresult cuGetErrorString(CUresult, const char **);
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106751/new/

https://reviews.llvm.org/D106751



More information about the Openmp-commits mailing list