[Openmp-commits] [openmp] 3820d0e - [OpenMP][FIX] Runtime args are not kernel args

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Sat Jan 21 13:46:21 PST 2023


Author: Johannes Doerfert
Date: 2023-01-21T13:43:10-08:00
New Revision: 3820d0eaaf4ecb557cbb260e34bf5a9eeb51e0e7

URL: https://github.com/llvm/llvm-project/commit/3820d0eaaf4ecb557cbb260e34bf5a9eeb51e0e7
DIFF: https://github.com/llvm/llvm-project/commit/3820d0eaaf4ecb557cbb260e34bf5a9eeb51e0e7.diff

LOG: [OpenMP][FIX] Runtime args are not kernel args

Clang passes `KernelArgs.NumArgs` to the runtime but not all are kernel
arguments. This ensures we fallback to the old logic. In a follow up we
should introduce a new `KernelArgs.NumKernelArgs` field and set it in
the runtime.

Added: 
    

Modified: 
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/src/omptarget.cpp
    openmp/libomptarget/src/private.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 4ea30802ef91..9df9e2298236 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -137,7 +137,7 @@ static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
               "Invalid struct size");
 static_assert(sizeof(KernelArgsTy) == (8 * sizeof(int32_t) + 3 * sizeof(int64_t) + 4 * sizeof(void**) + 2 * sizeof(int64_t*)),
               "Invalid struct size");
-constexpr KernelArgsTy CTorDTorKernelArgs = {1,       0,       nullptr,   nullptr,
+inline KernelArgsTy CTorDTorKernelArgs = {1,       0,       nullptr,   nullptr,
 	     nullptr, nullptr, nullptr,   nullptr,
 	     0,      {0,0},       {1, 0, 0}, {1, 0, 0}, 0};
 

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index a3b923277b82..f3a570f89692 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -1619,7 +1619,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
 /// returns 0 if it was able to transfer the execution to a target and an
 /// integer 
diff erent from zero otherwise.
 int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
-           const KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
+           KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
   int32_t DeviceId = Device.DeviceID;
   TableMap *TM = getTableMap(HostPtr);
   // No map for this host pointer found!
@@ -1655,10 +1655,11 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
 
   PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
 
+  int NumClangLaunchArgs = KernelArgs.NumArgs;
   int Ret = OFFLOAD_SUCCESS;
-  if (KernelArgs.NumArgs) {
+  if (NumClangLaunchArgs) {
     // Process data, such as data mapping, before launching the kernel
-    Ret = processDataBefore(Loc, DeviceId, HostPtr, KernelArgs.NumArgs,
+    Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
                             KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
                             KernelArgs.ArgSizes, KernelArgs.ArgTypes,
                             KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs,
@@ -1667,6 +1668,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
       REPORT("Failed to process data before launching the kernel.\n");
       return OFFLOAD_FAIL;
     }
+
+    // Clang might pass more values via the ArgPtrs to the runtime that we pass
+    // on to the kernel.
+    // TOOD: Next time we adjust the KernelArgsTy we should introduce a new
+    // NumKernelArgs field.
+    KernelArgs.NumArgs = TgtArgs.size();
   }
 
   // Launch device execution.
@@ -1675,6 +1682,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
      TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index);
 
   {
+    assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!");
     TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc);
     Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
                               KernelArgs, AsyncInfo);
@@ -1685,10 +1693,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
     return OFFLOAD_FAIL;
   }
 
-  if (KernelArgs.NumArgs) {
+  if (NumClangLaunchArgs) {
     // Transfer data back and deallocate target memory for (first-)private
     // variables
-    Ret = processDataAfter(Loc, DeviceId, HostPtr, KernelArgs.NumArgs,
+    Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
                            KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
                            KernelArgs.ArgSizes, KernelArgs.ArgTypes,
                            KernelArgs.ArgNames, KernelArgs.ArgMappers,

diff  --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 0277966c954e..9f156192e103 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -39,7 +39,7 @@ extern int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
                             bool FromMapper = false);
 
 extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
-                  const KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
+                  KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
 
 extern int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
                          void *DeviceMemory, int64_t DeviceMemorySize,


        


More information about the Openmp-commits mailing list