[llvm] [Offload][AMDGPU] Impose more restrictions for implicit kernel arguments (PR #95211)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 12 02:25:54 PDT 2024


https://github.com/jdoerfert created https://github.com/llvm/llvm-project/pull/95211

COV3 is not supported anymore, thus we can just use ArgsSize we read from the kernel to determine how many argument bytes we need and if implicit kernel arguments are used.

>From 094dbf3e01a62136db6be1afd6194f1d84ca8494 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Wed, 12 Jun 2024 02:14:32 -0700
Subject: [PATCH] [Offload][AMDGPU] Impose more restrictions for implicit
 kernel arguments

COV3 is not supported anymore, thus we can just use ArgsSize we read
from the kernel to determine how many argument bytes we need and if
implicit kernel arguments are used.
---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp | 27 ++++++++++++----------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 663cfdc5fdf01..26bca4a3674bd 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3267,9 +3267,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
                                  uint32_t NumThreads, uint64_t NumBlocks,
                                  KernelArgsTy &KernelArgs, void *Args,
                                  AsyncInfoWrapperTy &AsyncInfoWrapper) const {
-  const uint32_t KernelArgsSize = KernelArgs.NumArgs * sizeof(void *);
+  const uint32_t LaunchParamsSize = KernelArgs.NumArgs * sizeof(void *);
 
-  if (ArgsSize < KernelArgsSize)
+  if (ArgsSize != LaunchParamsSize &&
+      ArgsSize != LaunchParamsSize + getImplicitArgsSize())
     return Plugin::error("Mismatch of kernel arguments size");
 
   AMDGPUPluginTy &AMDGPUPlugin =
@@ -3292,20 +3293,21 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
   if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
     return Err;
 
-  // Initialize implicit arguments.
-  utils::AMDGPUImplicitArgsTy *ImplArgs =
-      reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
-          advanceVoidPtr(AllArgs, KernelArgsSize));
+  utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
+  if (ArgsSize == LaunchParamsSize + getImplicitArgsSize()) {
+    // Initialize implicit arguments.
+    ImplArgs = reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
+        advanceVoidPtr(AllArgs, LaunchParamsSize));
 
-  // Initialize the implicit arguments to zero.
-  std::memset(ImplArgs, 0, ImplicitArgsSize);
+    // Initialize the implicit arguments to zero.
+    std::memset(ImplArgs, 0, getImplicitArgsSize());
+  }
 
   // Copy the explicit arguments.
   // TODO: We should expose the args memory manager alloc to the common part as
   // 	   alternative to copying them twice.
-  if (KernelArgs.NumArgs)
-    std::memcpy(AllArgs, *static_cast<void **>(Args),
-                sizeof(void *) * KernelArgs.NumArgs);
+  if (LaunchParamsSize)
+    std::memcpy(AllArgs, *static_cast<void **>(Args), LaunchParamsSize);
 
   AMDGPUDeviceTy &AMDGPUDevice = static_cast<AMDGPUDeviceTy &>(GenericDevice);
 
@@ -3318,7 +3320,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
     Stream->setRPCServer(GenericDevice.getRPCServer());
 
   // Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
-  if (getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
+  if (ImplArgs &&
+      getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
     ImplArgs->BlockCountX = NumBlocks;
     ImplArgs->BlockCountY = 1;
     ImplArgs->BlockCountZ = 1;



More information about the llvm-commits mailing list