[llvm] Enable .ptr .global .align attributes for kernel attributes for CUDA (PR #114874)

Thu Feb 13 17:29:22 PST 2025

tom91136 wrote:

Hello, 
I've just tested this patch and I'm getting `CUDA_ERROR_INVALID_IMAGE` for any ptr kernel param attributes that isn't `.global` or empty.
I've reduced it to the  following PTX which fails on load and reports `CUDA_ERROR_INVALID_IMAGE`

```cpp
#include <cstdio>
#include <cuda.h>
#define CUDA_CHECK(call)                                                                                                                   \
  do {                                                                                                                                     \
    if (const CUresult res = call; res != CUDA_SUCCESS) {                                                                                  \
      const char *err;                                                                                                                     \
      cuGetErrorName(res, &err);                                                                                                           \
      std::fprintf(stderr, "Error %s:%d: %s\n", __FILE__, __LINE__, err);                                                                  \
      std::exit(res);                                                                                                                      \
    }                                                                                                                                      \
  } while (0)
int main() {
  CUDA_CHECK(cuInit(0));
  CUdevice dev;
  CUDA_CHECK(cuDeviceGet(&dev, 0));
  CUcontext ctx;
  CUDA_CHECK(cuCtxCreate(&ctx, 0, dev));
  CUmodule m;
  CUfunction f;
  const char *ptx = R"PTX(
.version 7.8
.target sm_89
.address_size 64
.visible .entry kernel(
  .param .u64 .ptr         .align 1 kernel_param_0,
  .param .u64 .ptr .global .align 1 kernel_param_2,
  .param .u64 .ptr .shared .align 1 kernel_param_3
){
  ret;
}
)PTX";
  CUDA_CHECK(cuModuleLoadData(&m, ptx));
  CUDA_CHECK(cuModuleGetFunction(&f, m, "kernel"));
  return 0;
}

```
If we take out the `.shared`, then the module loads. 
Reading the PTX spec, I think attributes like `.shared` should be allowed so not sure why it's failing.
The behaviour before this patch doesn't append address space attributes so it worked.

https://github.com/llvm/llvm-project/pull/114874