[Openmp-commits] [openmp] ad23e4d - [Libomptarget] Implement a unified kernel entry function

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Fri Jul 8 11:44:32 PDT 2022


Author: Joseph Huber
Date: 2022-07-08T14:44:06-04:00
New Revision: ad23e4d85fb39e99ff61f588bad480b824d9d1df

URL: https://github.com/llvm/llvm-project/commit/ad23e4d85fb39e99ff61f588bad480b824d9d1df
DIFF: https://github.com/llvm/llvm-project/commit/ad23e4d85fb39e99ff61f588bad480b824d9d1df.diff

LOG: [Libomptarget] Implement a unified kernel entry function

This patch implements a unified kernel entry function that will be
targeted from both teams and non-teams clauses. We introduce a new
interface and make the old functions call in using the new one. A
following patch will include the necessary changes to Clang to call
these new functions instead.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D128549

Added: 
    

Modified: 
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/src/exports
    openmp/libomptarget/src/interface.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 957ea725f16b8..6651cc8787b2d 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -109,6 +109,18 @@ enum TargetAllocTy : int32_t {
   TARGET_ALLOC_DEFAULT
 };
 
+/// This struct contains all of the arguments to a target kernel region launch.
+struct __tgt_kernel_arguments {
+  int32_t Version;    // Version of this struct for ABI compatibility.
+  int32_t NumArgs;    // Number of arguments in each input pointer.
+  void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
+  void **ArgPtrs;     // Pointer to the argument data.
+  int64_t *ArgSizes;  // Size of the argument data in bytes.
+  int64_t *ArgTypes;  // Type of the data (e.g. to / from).
+  void **ArgNames;    // Name of the data for debugging, possibly null.
+  void **ArgMappers;  // User-defined mappers, possible null.
+};
+
 /// This struct is a record of an entry point or global. For a function
 /// entry point the size is expected to be zero
 struct __tgt_offload_entry {
@@ -345,11 +357,19 @@ int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
                               map_var_info_t *ArgNames, void **ArgMappers,
                               int32_t NumTeams, int32_t ThreadLimit);
 int __tgt_target_teams_nowait_mapper(
-    ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
-    void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
-    map_var_info_t *ArgNames, void **ArgMappers, int32_t NumTeams,
-    int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
-    void *NoAliasDepList);
+    ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    map_var_info_t *arg_names, void **arg_mappers, int32_t num_teams,
+    int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList);
+int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
+                        int32_t ThreadLimit, void *HostPtr,
+                        __tgt_kernel_arguments *Args);
+int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
+                               int32_t ThreadLimit, void *HostPtr,
+                               __tgt_kernel_arguments *Args, int32_t DepNum,
+                               void *DepList, int32_t NoAliasDepNum,
+                               void *NoAliasDepList);
 
 void __kmpc_push_target_tripcount(int64_t DeviceId, uint64_t LoopTripcount);
 

diff  --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index fe27885dfb23e..35a665a98c27a 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -24,6 +24,8 @@ VERS1.0 {
     __tgt_target_data_update_nowait_mapper;
     __tgt_target_nowait_mapper;
     __tgt_target_teams_nowait_mapper;
+    __tgt_target_kernel;
+    __tgt_target_kernel_nowait;
     __tgt_mapper_num_components;
     __tgt_push_mapper_component;
     __kmpc_push_target_tripcount;

diff  --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index b21a8aa31334e..b27d5dfccbf76 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -278,36 +278,9 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
                                int64_t *ArgSizes, int64_t *ArgTypes,
                                map_var_info_t *ArgNames, void **ArgMappers) {
   TIMESCOPE_WITH_IDENT(Loc);
-  DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
-     "\n",
-     DPxPTR(HostPtr), DeviceId);
-  if (checkDeviceAndCtors(DeviceId, Loc)) {
-    DP("Not offloading to device %" PRId64 "\n", DeviceId);
-    return OMP_TGT_FAIL;
-  }
-
-  if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
-    printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
-                         "Entering OpenMP kernel");
-#ifdef OMPTARGET_DEBUG
-  for (int I = 0; I < ArgNum; ++I) {
-    DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
-       ", Type=0x%" PRIx64 ", Name=%s\n",
-       I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
-       (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
-  }
-#endif
-
-  DeviceTy &Device = *PM->Devices[DeviceId];
-  AsyncInfoTy AsyncInfo(Device);
-  int Rc =
-      target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes,
-             ArgNames, ArgMappers, 0, 0, false /*team*/, AsyncInfo);
-  if (Rc == OFFLOAD_SUCCESS)
-    Rc = AsyncInfo.synchronize();
-  handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
-  assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!");
-  return OMP_TGT_SUCCESS;
+  __tgt_kernel_arguments KernelArgs{1,        ArgNum,   ArgsBase, Args,
+                                    ArgSizes, ArgTypes, ArgNames, ArgMappers};
+  return __tgt_target_kernel(Loc, DeviceId, -1, 0, HostPtr, &KernelArgs);
 }
 
 EXTERN int __tgt_target_nowait_mapper(
@@ -352,50 +325,78 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
                                      map_var_info_t *ArgNames,
                                      void **ArgMappers, int32_t TeamNum,
                                      int32_t ThreadLimit) {
+  TIMESCOPE_WITH_IDENT(Loc);
+  __tgt_kernel_arguments KernelArgs{1,        ArgNum,   ArgsBase, Args,
+                                    ArgSizes, ArgTypes, ArgNames, ArgMappers};
+  return __tgt_target_kernel(Loc, DeviceId, TeamNum, ThreadLimit, HostPtr,
+                             &KernelArgs);
+}
+
+/// Implements a kernel entry that executes the target region on the specified
+/// device.
+///
+/// \param Loc Source location associated with this target region.
+/// \param DeviceId The device to execute this region, -1 indicated the default.
+/// \param NumTeams Number of teams to launch the region with, -1 indicates a
+///                 non-teams region and 0 indicates it was unspecified.
+/// \param ThreadLimit Limit to the number of threads to use in the kernel
+///                    launch, 0 indicates it was unspecified.
+/// \param HostPtr  The pointer to the host function registered with the kernel.
+/// \param Args     All arguments to this kernel launch (see struct definition).
+EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
+                               int32_t ThreadLimit, void *HostPtr,
+                               __tgt_kernel_arguments *Args) {
+  TIMESCOPE_WITH_IDENT(Loc);
   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
      "\n",
      DPxPTR(HostPtr), DeviceId);
+  if (Args->Version != 1) {
+    DP("Unexpected ABI version: %d\n", Args->Version);
+  }
   if (checkDeviceAndCtors(DeviceId, Loc)) {
     DP("Not offloading to device %" PRId64 "\n", DeviceId);
     return OMP_TGT_FAIL;
   }
 
   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
-    printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
+    printKernelArguments(Loc, DeviceId, Args->NumArgs, Args->ArgSizes,
+                         Args->ArgTypes, Args->ArgNames,
                          "Entering OpenMP kernel");
 #ifdef OMPTARGET_DEBUG
-  for (int I = 0; I < ArgNum; ++I) {
+  for (int I = 0; I < Args->NumArgs; ++I) {
     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
        ", Type=0x%" PRIx64 ", Name=%s\n",
-       I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
-       (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
+       I, DPxPTR(Args->ArgBasePtrs[I]), DPxPTR(Args->ArgPtrs[I]),
+       Args->ArgSizes[I], Args->ArgTypes[I],
+       (Args->ArgNames) ? getNameFromMapping(Args->ArgNames[I]).c_str()
+                        : "unknown");
   }
 #endif
 
+  bool IsTeams = NumTeams != -1;
+  if (!IsTeams)
+    NumTeams = 0;
+
   DeviceTy &Device = *PM->Devices[DeviceId];
   AsyncInfoTy AsyncInfo(Device);
-  int Rc = target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes,
-                  ArgTypes, ArgNames, ArgMappers, TeamNum, ThreadLimit,
-                  true /*team*/, AsyncInfo);
+  int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs,
+                  Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames,
+                  Args->ArgMappers, NumTeams, ThreadLimit, IsTeams, AsyncInfo);
   if (Rc == OFFLOAD_SUCCESS)
     Rc = AsyncInfo.synchronize();
   handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
-  assert(Rc == OFFLOAD_SUCCESS &&
-         "__tgt_target_teams_mapper unexpected failure!");
+  assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
   return OMP_TGT_SUCCESS;
 }
 
-EXTERN int __tgt_target_teams_nowait_mapper(
-    ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
-    void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
-    map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
-    int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
-    void *NoAliasDepList) {
+EXTERN int __tgt_target_kernel_nowait(
+    ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit,
+    void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList,
+    int32_t NoAliasDepNum, void *NoAliasDepList) {
   TIMESCOPE_WITH_IDENT(Loc);
 
-  return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
-                                   Args, ArgSizes, ArgTypes, ArgNames,
-                                   ArgMappers, TeamNum, ThreadLimit);
+  return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
+                             Args);
 }
 
 // Get the current number of components for a user-defined mapper.


        


More information about the Openmp-commits mailing list