[Openmp-commits] [openmp] 3820d0e - [OpenMP][FIX] Runtime args are not kernel args
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Sat Jan 21 13:46:21 PST 2023
Author: Johannes Doerfert
Date: 2023-01-21T13:43:10-08:00
New Revision: 3820d0eaaf4ecb557cbb260e34bf5a9eeb51e0e7
URL: https://github.com/llvm/llvm-project/commit/3820d0eaaf4ecb557cbb260e34bf5a9eeb51e0e7
DIFF: https://github.com/llvm/llvm-project/commit/3820d0eaaf4ecb557cbb260e34bf5a9eeb51e0e7.diff
LOG: [OpenMP][FIX] Runtime args are not kernel args
Clang passes `KernelArgs.NumArgs` to the runtime but not all are kernel
arguments. This ensures we fallback to the old logic. In a follow up we
should introduce a new `KernelArgs.NumKernelArgs` field and set it in
the runtime.
Added:
Modified:
openmp/libomptarget/include/omptarget.h
openmp/libomptarget/src/omptarget.cpp
openmp/libomptarget/src/private.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 4ea30802ef91..9df9e2298236 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -137,7 +137,7 @@ static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
"Invalid struct size");
static_assert(sizeof(KernelArgsTy) == (8 * sizeof(int32_t) + 3 * sizeof(int64_t) + 4 * sizeof(void**) + 2 * sizeof(int64_t*)),
"Invalid struct size");
-constexpr KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
+inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
0, {0,0}, {1, 0, 0}, {1, 0, 0}, 0};
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index a3b923277b82..f3a570f89692 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -1619,7 +1619,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
/// returns 0 if it was able to transfer the execution to a target and an
/// integer
diff erent from zero otherwise.
int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
- const KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
+ KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
int32_t DeviceId = Device.DeviceID;
TableMap *TM = getTableMap(HostPtr);
// No map for this host pointer found!
@@ -1655,10 +1655,11 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
+ int NumClangLaunchArgs = KernelArgs.NumArgs;
int Ret = OFFLOAD_SUCCESS;
- if (KernelArgs.NumArgs) {
+ if (NumClangLaunchArgs) {
// Process data, such as data mapping, before launching the kernel
- Ret = processDataBefore(Loc, DeviceId, HostPtr, KernelArgs.NumArgs,
+ Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
KernelArgs.ArgSizes, KernelArgs.ArgTypes,
KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs,
@@ -1667,6 +1668,12 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
REPORT("Failed to process data before launching the kernel.\n");
return OFFLOAD_FAIL;
}
+
+ // Clang might pass more values via the ArgPtrs to the runtime that we pass
+ // on to the kernel.
+ // TOOD: Next time we adjust the KernelArgsTy we should introduce a new
+ // NumKernelArgs field.
+ KernelArgs.NumArgs = TgtArgs.size();
}
// Launch device execution.
@@ -1675,6 +1682,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index);
{
+ assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!");
TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc);
Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
KernelArgs, AsyncInfo);
@@ -1685,10 +1693,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
return OFFLOAD_FAIL;
}
- if (KernelArgs.NumArgs) {
+ if (NumClangLaunchArgs) {
// Transfer data back and deallocate target memory for (first-)private
// variables
- Ret = processDataAfter(Loc, DeviceId, HostPtr, KernelArgs.NumArgs,
+ Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
KernelArgs.ArgSizes, KernelArgs.ArgTypes,
KernelArgs.ArgNames, KernelArgs.ArgMappers,
diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 0277966c954e..9f156192e103 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -39,7 +39,7 @@ extern int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
bool FromMapper = false);
extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
- const KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
+ KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
extern int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
void *DeviceMemory, int64_t DeviceMemorySize,
More information about the Openmp-commits
mailing list