[Openmp-commits] [openmp] r302663 - [OpenMP] Changes in the plugin interface

George Rokos via Openmp-commits openmp-commits at lists.llvm.org
Wed May 10 07:12:36 PDT 2017


Author: grokos
Date: Wed May 10 09:12:36 2017
New Revision: 302663

URL: http://llvm.org/viewvc/llvm-project?rev=302663&view=rev
Log:
[OpenMP] Changes in the plugin interface

This patch chagnes the plugin interface so that:
1) future plugins can take advantage of systems with shared CPU/device storage
2) instead of using base addresses, target regions are launched by providing target addresseds and base offsets explicitly.

Differential revision: https://reviews.llvm.org/D33028
 

Added:
    openmp/trunk/libomptarget/src/omptargetplugin.h
Modified:
    openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp
    openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
    openmp/trunk/libomptarget/src/omptarget.cpp
    openmp/trunk/libomptarget/src/omptarget.h

Modified: openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp (original)
+++ openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp Wed May 10 09:12:36 2017
@@ -19,7 +19,7 @@
 #include <string>
 #include <vector>
 
-#include "omptarget.h"
+#include "omptargetplugin.h"
 
 #ifndef TARGET_NAME
 #define TARGET_NAME CUDA
@@ -473,7 +473,7 @@ __tgt_target_table *__tgt_rtl_load_binar
   return DeviceInfo.getOffloadEntriesTable(device_id);
 }
 
-void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size) {
+void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
   if (size == 0) {
     return NULL;
   }
@@ -559,8 +559,8 @@ int32_t __tgt_rtl_data_delete(int32_t de
 }
 
 int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
-    void **tgt_args, int32_t arg_num, int32_t team_num, int32_t thread_limit,
-    uint64_t loop_tripcount) {
+    void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
+    int32_t thread_limit, uint64_t loop_tripcount) {
   // Set the context we are using.
   CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
   if (err != CUDA_SUCCESS) {
@@ -571,9 +571,12 @@ int32_t __tgt_rtl_run_target_team_region
 
   // All args are references.
   std::vector<void *> args(arg_num);
+  std::vector<void *> ptrs(arg_num);
 
-  for (int32_t i = 0; i < arg_num; ++i)
-    args[i] = &tgt_args[i];
+  for (int32_t i = 0; i < arg_num; ++i) {
+    ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
+    args[i] = &ptrs[i];
+  }
 
   KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr;
 
@@ -678,12 +681,12 @@ int32_t __tgt_rtl_run_target_team_region
 }
 
 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
-    void **tgt_args, int32_t arg_num) {
+    void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num) {
   // use one team and the default number of threads.
   const int32_t team_num = 1;
   const int32_t thread_limit = 0;
   return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
-      arg_num, team_num, thread_limit, 0);
+      tgt_offsets, arg_num, team_num, thread_limit, 0);
 }
 
 #ifdef __cplusplus

Modified: openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp (original)
+++ openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp Wed May 10 09:12:36 2017
@@ -22,7 +22,7 @@
 #include <list>
 #include <vector>
 
-#include "omptarget.h"
+#include "omptargetplugin.h"
 
 #ifndef TARGET_NAME
 #define TARGET_NAME Generic ELF - 64bit
@@ -251,7 +251,7 @@ __tgt_target_table *__tgt_rtl_load_binar
   return DeviceInfo.getOffloadEntriesTable(device_id);
 }
 
-void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size) {
+void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
   void *ptr = malloc(size);
   return ptr;
 }
@@ -274,8 +274,8 @@ int32_t __tgt_rtl_data_delete(int32_t de
 }
 
 int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
-    void **tgt_args, int32_t arg_num, int32_t team_num, int32_t thread_limit,
-    uint64_t loop_tripcount /*not used*/) {
+    void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
+    int32_t thread_limit, uint64_t loop_tripcount /*not used*/) {
   // ignore team num and thread limit.
 
   // Use libffi to launch execution.
@@ -284,9 +284,12 @@ int32_t __tgt_rtl_run_target_team_region
   // All args are references.
   std::vector<ffi_type *> args_types(arg_num, &ffi_type_pointer);
   std::vector<void *> args(arg_num);
+  std::vector<void *> ptrs(arg_num);
 
-  for (int32_t i = 0; i < arg_num; ++i)
-    args[i] = &tgt_args[i];
+  for (int32_t i = 0; i < arg_num; ++i) {
+    ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
+    args[i] = &ptrs[i];
+  }
 
   ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num,
                                    &ffi_type_void, &args_types[0]);
@@ -303,10 +306,10 @@ int32_t __tgt_rtl_run_target_team_region
 }
 
 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
-                                    void **tgt_args, int32_t arg_num) {
+    void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num) {
   // use one team and one thread.
   return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
-                                          arg_num, 1, 1, 0);
+      tgt_offsets, arg_num, 1, 1, 0);
 }
 
 #ifdef __cplusplus

Modified: openmp/trunk/libomptarget/src/omptarget.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptarget.cpp?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/omptarget.cpp (original)
+++ openmp/trunk/libomptarget/src/omptarget.cpp Wed May 10 09:12:36 2017
@@ -162,10 +162,11 @@ struct DeviceTy {
   int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size);
   int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
 
-  int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr, int32_t TgtVarsSize);
+  int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr,
+      ptrdiff_t *TgtOffsets, int32_t TgtVarsSize);
   int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
-      int32_t TgtVarsSize, int32_t NumTeams, int32_t ThreadLimit,
-      uint64_t LoopTripCount);
+      ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams,
+      int32_t ThreadLimit, uint64_t LoopTripCount);
 
 private:
   // Call to RTL
@@ -181,13 +182,14 @@ struct RTLInfoTy {
   typedef int32_t(number_of_devices_ty)();
   typedef int32_t(init_device_ty)(int32_t);
   typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
-  typedef void *(data_alloc_ty)(int32_t, int64_t);
+  typedef void *(data_alloc_ty)(int32_t, int64_t, void *);
   typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
   typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t);
   typedef int32_t(data_delete_ty)(int32_t, void *);
-  typedef int32_t(run_region_ty)(int32_t, void *, void **, int32_t);
-  typedef int32_t(run_team_region_ty)(int32_t, void *, void **, int32_t,
-                                      int32_t, int32_t, uint64_t);
+  typedef int32_t(run_region_ty)(int32_t, void *, void **, ptrdiff_t *,
+                                 int32_t);
+  typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *,
+                                      int32_t, int32_t, int32_t, uint64_t);
 
   int32_t Idx;                     // RTL index, index is the number of devices
                                    // of other RTLs that were registered before,
@@ -471,7 +473,7 @@ EXTERN void *omp_target_alloc(size_t siz
   }
 
   DeviceTy &Device = Devices[device_num];
-  rc = Device.RTL->data_alloc(Device.RTLDeviceID, size);
+  rc = Device.RTL->data_alloc(Device.RTLDeviceID, size, NULL);
   DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc));
   return rc;
 }
@@ -861,7 +863,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *H
   } else if (Size) {
     // If it is not contained and Size > 0 we should create a new entry for it.
     IsNew = true;
-    uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size);
+    uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
     DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
         "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
         DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
@@ -995,16 +997,17 @@ int32_t DeviceTy::data_retrieve(void *Hs
 
 // Run region on device
 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
-    int32_t TgtVarsSize) {
-  return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtVarsSize);
+    ptrdiff_t *TgtOffsets, int32_t TgtVarsSize) {
+  return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
+      TgtVarsSize);
 }
 
 // Run team region on device.
 int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
-    int32_t TgtVarsSize, int32_t NumTeams, int32_t ThreadLimit,
-    uint64_t LoopTripCount) {
-  return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtVarsSize,
-      NumTeams, ThreadLimit, LoopTripCount);
+    ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams,
+    int32_t ThreadLimit, uint64_t LoopTripCount) {
+  return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
+      TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -2108,6 +2111,7 @@ static int target(int32_t device_id, voi
   }
 
   std::vector<void *> tgt_args;
+  std::vector<ptrdiff_t> tgt_offsets;
 
   // List of (first-)private arrays allocated for this target region
   std::vector<void *> fpArrays;
@@ -2119,16 +2123,18 @@ static int target(int32_t device_id, voi
     }
     void *HstPtrBegin = args[i];
     void *HstPtrBase = args_base[i];
-    void *TgtPtrBase;
+    void *TgtPtrBegin;
+    ptrdiff_t TgtBaseOffset;
     bool IsLast; // unused.
     if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
       DP("Forwarding first-private value " DPxMOD " to the target construct\n",
           DPxPTR(HstPtrBase));
-      TgtPtrBase = HstPtrBase;
+      TgtPtrBegin = HstPtrBase;
+      TgtBaseOffset = 0;
     } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
       // Allocate memory for (first-)private array
-      void *TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
-          arg_sizes[i]);
+      TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
+          arg_sizes[i], HstPtrBegin);
       if (!TgtPtrBegin) {
         DP ("Data allocation for %sprivate array " DPxMOD " failed\n",
             (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
@@ -2137,8 +2143,8 @@ static int target(int32_t device_id, voi
         break;
       } else {
         fpArrays.push_back(TgtPtrBegin);
-        uint64_t PtrDelta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
-        TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - PtrDelta);
+        TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
+        void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
         DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
             "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
             arg_sizes[i], DPxPTR(TgtPtrBegin),
@@ -2155,24 +2161,29 @@ static int target(int32_t device_id, voi
         }
       }
     } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-      void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *),
-          IsLast, false);
-      TgtPtrBase = TgtPtrBegin; // no offset for ptrs.
+      TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
+          false);
+      TgtBaseOffset = 0; // no offset for ptrs.
       DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
          "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
          DPxPTR(HstPtrBase));
     } else {
-      void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i],
-          IsLast, false);
-      uint64_t PtrDelta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
-      TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - PtrDelta);
+      TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
+          false);
+      TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
+      void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
       DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
           DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
     }
-    tgt_args.push_back(TgtPtrBase);
+    tgt_args.push_back(TgtPtrBegin);
+    tgt_offsets.push_back(TgtBaseOffset);
   }
   // Push omp handle.
   tgt_args.push_back((void *)0);
+  tgt_offsets.push_back(0);
+
+  assert(tgt_args.size() == tgt_offsets.size() &&
+      "Size mismatch in arguments and offsets");
 
   // Pop loop trip count
   uint64_t ltc = Device.loopTripCnt;
@@ -2185,10 +2196,11 @@ static int target(int32_t device_id, voi
         DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
     if (IsTeamConstruct) {
       rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
-          &tgt_args[0], tgt_args.size(), team_num, thread_limit, ltc);
+          &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
+          thread_limit, ltc);
     } else {
       rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
-          &tgt_args[0], tgt_args.size());
+          &tgt_args[0], &tgt_offsets[0], tgt_args.size());
     }
   } else {
     DP("Errors occurred while obtaining target arguments, skipping kernel "

Modified: openmp/trunk/libomptarget/src/omptarget.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptarget.h?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/omptarget.h (original)
+++ openmp/trunk/libomptarget/src/omptarget.h Wed May 10 09:12:36 2017
@@ -16,6 +16,7 @@
 #define _OMPTARGET_H_
 
 #include <stdint.h>
+#include <stddef.h>
 
 #define OFFLOAD_SUCCESS (0)
 #define OFFLOAD_FAIL (~0)

Added: openmp/trunk/libomptarget/src/omptargetplugin.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptargetplugin.h?rev=302663&view=auto
==============================================================================
--- openmp/trunk/libomptarget/src/omptargetplugin.h (added)
+++ openmp/trunk/libomptarget/src/omptargetplugin.h Wed May 10 09:12:36 2017
@@ -0,0 +1,92 @@
+//===-- omptargetplugin.h - Target dependent OpenMP Plugin API --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an interface between target independent OpenMP offload
+// runtime library libomptarget and target dependent plugin.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _OMPTARGETPLUGIN_H_
+#define _OMPTARGETPLUGIN_H_
+
+#include <omptarget.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Return the number of available devices of the type supported by the
+// target RTL.
+int32_t __tgt_rtl_number_of_devices(void);
+
+// Return an integer different from zero if the provided device image can be
+// supported by the runtime. The functionality is similar to comparing the
+// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
+// lightweight query to determine if the RTL is suitable for an image without
+// having to load the library, which can be expensive.
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
+
+// Initialize the specified device. In case of success return 0; otherwise
+// return an error code.
+int32_t __tgt_rtl_init_device(int32_t ID);
+
+// Pass an executable image section described by image to the specified
+// device and prepare an address table of target entities. In case of error,
+// return NULL. Otherwise, return a pointer to the built address table.
+// Individual entries in the table may also be NULL, when the corresponding
+// offload region is not supported on the target device.
+__tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
+                                          __tgt_device_image *Image);
+
+// Allocate data on the particular target device, of the specified size.
+// HostPtr is a address of the host data the allocated target data
+// will be associated with (HostPtr may be NULL if it is not known at
+// allocation time, like for example it would be for target data that
+// is allocated by omp_target_alloc() API). Return address of the
+// allocated data on the target that will be used by libomptarget.so to
+// initialize the target data mapping structures. These addresses are
+// used to generate a table of target variables to pass to
+// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
+// case an error occurred on the target device.
+void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr);
+
+// Pass the data content to the target device using the target address.
+// In case of success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
+                              int64_t Size);
+
+// Retrieve the data content from the target device using its address.
+// In case of success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
+                                int64_t Size);
+
+// De-allocate the data referenced by target ptr on the device. In case of
+// success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
+
+// Transfer control to the offloaded entry Entry on the target device.
+// Args and Offsets are arrays of NumArgs size of target addresses and
+// offsets. An offset should be added to the target address before passing it
+// to the outlined function on device side. In case of success, return zero.
+// Otherwise, return an error code.
+int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
+                                    ptrdiff_t *Offsets, int32_t NumArgs);
+
+// Similar to __tgt_rtl_run_target_region, but additionally specify the
+// number of teams to be created and a number of threads in each team.
+int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
+                                         ptrdiff_t *Offsets, int32_t NumArgs,
+                                         int32_t NumTeams, int32_t ThreadLimit,
+                                         uint64_t loop_tripcount);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _OMPTARGETPLUGIN_H_




More information about the Openmp-commits mailing list