[llvm] [offload] Add properties parameter to olLaunchKernel (PR #184343)
Ćukasz Plewa via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 09:51:59 PST 2026
================
@@ -1495,9 +1514,44 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
MaxDynCGroupMemLimit = MaxDynCGroupMem;
}
- CUresult Res = cuLaunchKernel(Func, NumBlocks[0], NumBlocks[1], NumBlocks[2],
- NumThreads[0], NumThreads[1], NumThreads[2],
- MaxDynCGroupMem, Stream, nullptr, Config);
+ CUresult Res;
+ if (KernelArgs.Flags.Cooperative) {
+ CUDADeviceTy &CUDADevice = static_cast<CUDADeviceTy &>(GenericDevice);
+
+ uint32_t SupportsCooperative = 0;
+ if (auto Err = CUDADevice.getDeviceAttr(
+ CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, SupportsCooperative))
+ return Err;
+
+ if (!SupportsCooperative) {
+ return Plugin::error(ErrorCode::UNSUPPORTED,
+ "Device does not support cooperative launch");
+ }
+
+ CUlaunchAttribute CoopAttr;
+ CoopAttr.id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE;
+ CoopAttr.value.cooperative = 1;
+
+ CUlaunchConfig LaunchConfig;
+ LaunchConfig.gridDimX = NumBlocks[0];
+ LaunchConfig.gridDimY = NumBlocks[1];
+ LaunchConfig.gridDimZ = NumBlocks[2];
+ LaunchConfig.blockDimX = NumThreads[0];
+ LaunchConfig.blockDimY = NumThreads[1];
+ LaunchConfig.blockDimZ = NumThreads[2];
+ LaunchConfig.sharedMemBytes = MaxDynCGroupMem;
+ LaunchConfig.hStream = Stream;
+ LaunchConfig.attrs = &CoopAttr;
+ LaunchConfig.numAttrs = 1;
+
+ // Launch kernel with config-based arguments
+ Res = cuLaunchKernelEx(&LaunchConfig, Func, nullptr, Config);
----------------
lplewa wrote:
I switched to use only "cuLaunchKernelEx". Not sure if i should update any additional documentation about change of the minimum version of cuda
https://github.com/llvm/llvm-project/pull/184343
More information about the llvm-commits
mailing list