[llvm] [Offload] Use newer CUDA API functions when dynamically loaded (PR #93057)

Wed May 22 08:51:40 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-offload

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
CUDA does its versioning by putting a redirection in the header so the
API functions remain the same while the symbol changes. These weren't
being used for some functions that required it in the dynamic cuda
version.

These functions have newer verisons that should be used. These are
fairly old as far as I'm aware so we should be able to sweep backward
compatibility under the rug.


---
Full diff: https://github.com/llvm/llvm-project/pull/93057.diff


2 Files Affected:

- (modified) offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp (+1) 
- (modified) offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h (+10) 


``````````diff

diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
index 5ec3adb9e4e3a..25e50d9fdf9af 100644
--- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
+++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
@@ -42,6 +42,7 @@ DLWRAP(cuLaunchKernel, 11)
 
 DLWRAP(cuMemAlloc, 2)
 DLWRAP(cuMemAllocHost, 2)
+DLWRAP(cuCtxGetApiVersion, 2)
 DLWRAP(cuMemAllocManaged, 3)
 DLWRAP(cuMemAllocAsync, 3)
 
diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h
index 32031c28f8797..5bae3539b793a 100644
--- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h
+++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h
@@ -16,6 +16,15 @@
 #include <cstddef>
 #include <cstdint>
 
+#define cuDeviceTotalMem                    cuDeviceTotalMem_v2
+#define cuModuleGetGlobal                   cuModuleGetGlobal_v2
+#define cuMemGetInfo                        cuMemGetInfo_v2
+#define cuMemAlloc                          cuMemAlloc_v2
+#define cuMemFree                           cuMemFree_v2
+#define cuMemAllocHost                      cuMemAllocHost_v2
+#define cuDevicePrimaryCtxRelease           cuDevicePrimaryCtxRelease_v2
+#define cuDevicePrimaryCtxSetFlags          cuDevicePrimaryCtxSetFlags_v2
+
 typedef int CUdevice;
 typedef uintptr_t CUdeviceptr;
 typedef struct CUmod_st *CUmodule;
@@ -292,6 +301,7 @@ CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
 
 CUresult cuMemAlloc(CUdeviceptr *, size_t);
 CUresult cuMemAllocHost(void **, size_t);
+CUresult cuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
 CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int);
 CUresult cuMemAllocAsync(CUdeviceptr *, size_t, CUstream);
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/93057