[llvm] [Offload][AMDGPU] Impose more restrictions for implicit kernel arguments (PR #95211)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 12 02:26:29 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Johannes Doerfert (jdoerfert)
<details>
<summary>Changes</summary>
COV3 is not supported anymore, thus we can just use ArgsSize we read from the kernel to determine how many argument bytes we need and if implicit kernel arguments are used.
---
Full diff: https://github.com/llvm/llvm-project/pull/95211.diff
1 Files Affected:
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+15-12)
``````````diff
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 663cfdc5fdf01..26bca4a3674bd 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3267,9 +3267,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
uint32_t NumThreads, uint64_t NumBlocks,
KernelArgsTy &KernelArgs, void *Args,
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
- const uint32_t KernelArgsSize = KernelArgs.NumArgs * sizeof(void *);
+ const uint32_t LaunchParamsSize = KernelArgs.NumArgs * sizeof(void *);
- if (ArgsSize < KernelArgsSize)
+ if (ArgsSize != LaunchParamsSize &&
+ ArgsSize != LaunchParamsSize + getImplicitArgsSize())
return Plugin::error("Mismatch of kernel arguments size");
AMDGPUPluginTy &AMDGPUPlugin =
@@ -3292,20 +3293,21 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
return Err;
- // Initialize implicit arguments.
- utils::AMDGPUImplicitArgsTy *ImplArgs =
- reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
- advanceVoidPtr(AllArgs, KernelArgsSize));
+ utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
+ if (ArgsSize == LaunchParamsSize + getImplicitArgsSize()) {
+ // Initialize implicit arguments.
+ ImplArgs = reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
+ advanceVoidPtr(AllArgs, LaunchParamsSize));
- // Initialize the implicit arguments to zero.
- std::memset(ImplArgs, 0, ImplicitArgsSize);
+ // Initialize the implicit arguments to zero.
+ std::memset(ImplArgs, 0, getImplicitArgsSize());
+ }
// Copy the explicit arguments.
// TODO: We should expose the args memory manager alloc to the common part as
// alternative to copying them twice.
- if (KernelArgs.NumArgs)
- std::memcpy(AllArgs, *static_cast<void **>(Args),
- sizeof(void *) * KernelArgs.NumArgs);
+ if (LaunchParamsSize)
+ std::memcpy(AllArgs, *static_cast<void **>(Args), LaunchParamsSize);
AMDGPUDeviceTy &AMDGPUDevice = static_cast<AMDGPUDeviceTy &>(GenericDevice);
@@ -3318,7 +3320,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
Stream->setRPCServer(GenericDevice.getRPCServer());
// Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
- if (getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
+ if (ImplArgs &&
+ getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
ImplArgs->BlockCountX = NumBlocks;
ImplArgs->BlockCountY = 1;
ImplArgs->BlockCountZ = 1;
``````````
</details>
https://github.com/llvm/llvm-project/pull/95211
More information about the llvm-commits
mailing list