[Openmp-commits] [openmp] r302663 - [OpenMP] Changes in the plugin interface
George Rokos via Openmp-commits
openmp-commits at lists.llvm.org
Wed May 10 07:12:36 PDT 2017
Author: grokos
Date: Wed May 10 09:12:36 2017
New Revision: 302663
URL: http://llvm.org/viewvc/llvm-project?rev=302663&view=rev
Log:
[OpenMP] Changes in the plugin interface
This patch chagnes the plugin interface so that:
1) future plugins can take advantage of systems with shared CPU/device storage
2) instead of using base addresses, target regions are launched by providing target addresseds and base offsets explicitly.
Differential revision: https://reviews.llvm.org/D33028
Added:
openmp/trunk/libomptarget/src/omptargetplugin.h
Modified:
openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp
openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
openmp/trunk/libomptarget/src/omptarget.cpp
openmp/trunk/libomptarget/src/omptarget.h
Modified: openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp (original)
+++ openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp Wed May 10 09:12:36 2017
@@ -19,7 +19,7 @@
#include <string>
#include <vector>
-#include "omptarget.h"
+#include "omptargetplugin.h"
#ifndef TARGET_NAME
#define TARGET_NAME CUDA
@@ -473,7 +473,7 @@ __tgt_target_table *__tgt_rtl_load_binar
return DeviceInfo.getOffloadEntriesTable(device_id);
}
-void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size) {
+void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
if (size == 0) {
return NULL;
}
@@ -559,8 +559,8 @@ int32_t __tgt_rtl_data_delete(int32_t de
}
int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
- void **tgt_args, int32_t arg_num, int32_t team_num, int32_t thread_limit,
- uint64_t loop_tripcount) {
+ void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
+ int32_t thread_limit, uint64_t loop_tripcount) {
// Set the context we are using.
CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
if (err != CUDA_SUCCESS) {
@@ -571,9 +571,12 @@ int32_t __tgt_rtl_run_target_team_region
// All args are references.
std::vector<void *> args(arg_num);
+ std::vector<void *> ptrs(arg_num);
- for (int32_t i = 0; i < arg_num; ++i)
- args[i] = &tgt_args[i];
+ for (int32_t i = 0; i < arg_num; ++i) {
+ ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
+ args[i] = &ptrs[i];
+ }
KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr;
@@ -678,12 +681,12 @@ int32_t __tgt_rtl_run_target_team_region
}
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
- void **tgt_args, int32_t arg_num) {
+ void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num) {
// use one team and the default number of threads.
const int32_t team_num = 1;
const int32_t thread_limit = 0;
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
- arg_num, team_num, thread_limit, 0);
+ tgt_offsets, arg_num, team_num, thread_limit, 0);
}
#ifdef __cplusplus
Modified: openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp (original)
+++ openmp/trunk/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp Wed May 10 09:12:36 2017
@@ -22,7 +22,7 @@
#include <list>
#include <vector>
-#include "omptarget.h"
+#include "omptargetplugin.h"
#ifndef TARGET_NAME
#define TARGET_NAME Generic ELF - 64bit
@@ -251,7 +251,7 @@ __tgt_target_table *__tgt_rtl_load_binar
return DeviceInfo.getOffloadEntriesTable(device_id);
}
-void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size) {
+void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
void *ptr = malloc(size);
return ptr;
}
@@ -274,8 +274,8 @@ int32_t __tgt_rtl_data_delete(int32_t de
}
int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
- void **tgt_args, int32_t arg_num, int32_t team_num, int32_t thread_limit,
- uint64_t loop_tripcount /*not used*/) {
+ void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
+ int32_t thread_limit, uint64_t loop_tripcount /*not used*/) {
// ignore team num and thread limit.
// Use libffi to launch execution.
@@ -284,9 +284,12 @@ int32_t __tgt_rtl_run_target_team_region
// All args are references.
std::vector<ffi_type *> args_types(arg_num, &ffi_type_pointer);
std::vector<void *> args(arg_num);
+ std::vector<void *> ptrs(arg_num);
- for (int32_t i = 0; i < arg_num; ++i)
- args[i] = &tgt_args[i];
+ for (int32_t i = 0; i < arg_num; ++i) {
+ ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
+ args[i] = &ptrs[i];
+ }
ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num,
&ffi_type_void, &args_types[0]);
@@ -303,10 +306,10 @@ int32_t __tgt_rtl_run_target_team_region
}
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
- void **tgt_args, int32_t arg_num) {
+ void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num) {
// use one team and one thread.
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
- arg_num, 1, 1, 0);
+ tgt_offsets, arg_num, 1, 1, 0);
}
#ifdef __cplusplus
Modified: openmp/trunk/libomptarget/src/omptarget.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptarget.cpp?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/omptarget.cpp (original)
+++ openmp/trunk/libomptarget/src/omptarget.cpp Wed May 10 09:12:36 2017
@@ -162,10 +162,11 @@ struct DeviceTy {
int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size);
int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
- int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr, int32_t TgtVarsSize);
+ int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr,
+ ptrdiff_t *TgtOffsets, int32_t TgtVarsSize);
int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
- int32_t TgtVarsSize, int32_t NumTeams, int32_t ThreadLimit,
- uint64_t LoopTripCount);
+ ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams,
+ int32_t ThreadLimit, uint64_t LoopTripCount);
private:
// Call to RTL
@@ -181,13 +182,14 @@ struct RTLInfoTy {
typedef int32_t(number_of_devices_ty)();
typedef int32_t(init_device_ty)(int32_t);
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
- typedef void *(data_alloc_ty)(int32_t, int64_t);
+ typedef void *(data_alloc_ty)(int32_t, int64_t, void *);
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t);
typedef int32_t(data_delete_ty)(int32_t, void *);
- typedef int32_t(run_region_ty)(int32_t, void *, void **, int32_t);
- typedef int32_t(run_team_region_ty)(int32_t, void *, void **, int32_t,
- int32_t, int32_t, uint64_t);
+ typedef int32_t(run_region_ty)(int32_t, void *, void **, ptrdiff_t *,
+ int32_t);
+ typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *,
+ int32_t, int32_t, int32_t, uint64_t);
int32_t Idx; // RTL index, index is the number of devices
// of other RTLs that were registered before,
@@ -471,7 +473,7 @@ EXTERN void *omp_target_alloc(size_t siz
}
DeviceTy &Device = Devices[device_num];
- rc = Device.RTL->data_alloc(Device.RTLDeviceID, size);
+ rc = Device.RTL->data_alloc(Device.RTLDeviceID, size, NULL);
DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc));
return rc;
}
@@ -861,7 +863,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *H
} else if (Size) {
// If it is not contained and Size > 0 we should create a new entry for it.
IsNew = true;
- uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size);
+ uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
@@ -995,16 +997,17 @@ int32_t DeviceTy::data_retrieve(void *Hs
// Run region on device
int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
- int32_t TgtVarsSize) {
- return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtVarsSize);
+ ptrdiff_t *TgtOffsets, int32_t TgtVarsSize) {
+ return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
+ TgtVarsSize);
}
// Run team region on device.
int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
- int32_t TgtVarsSize, int32_t NumTeams, int32_t ThreadLimit,
- uint64_t LoopTripCount) {
- return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtVarsSize,
- NumTeams, ThreadLimit, LoopTripCount);
+ ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams,
+ int32_t ThreadLimit, uint64_t LoopTripCount) {
+ return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
+ TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount);
}
////////////////////////////////////////////////////////////////////////////////
@@ -2108,6 +2111,7 @@ static int target(int32_t device_id, voi
}
std::vector<void *> tgt_args;
+ std::vector<ptrdiff_t> tgt_offsets;
// List of (first-)private arrays allocated for this target region
std::vector<void *> fpArrays;
@@ -2119,16 +2123,18 @@ static int target(int32_t device_id, voi
}
void *HstPtrBegin = args[i];
void *HstPtrBase = args_base[i];
- void *TgtPtrBase;
+ void *TgtPtrBegin;
+ ptrdiff_t TgtBaseOffset;
bool IsLast; // unused.
if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
DP("Forwarding first-private value " DPxMOD " to the target construct\n",
DPxPTR(HstPtrBase));
- TgtPtrBase = HstPtrBase;
+ TgtPtrBegin = HstPtrBase;
+ TgtBaseOffset = 0;
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
// Allocate memory for (first-)private array
- void *TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
- arg_sizes[i]);
+ TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
+ arg_sizes[i], HstPtrBegin);
if (!TgtPtrBegin) {
DP ("Data allocation for %sprivate array " DPxMOD " failed\n",
(arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
@@ -2137,8 +2143,8 @@ static int target(int32_t device_id, voi
break;
} else {
fpArrays.push_back(TgtPtrBegin);
- uint64_t PtrDelta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
- TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - PtrDelta);
+ TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
+ void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
"%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
arg_sizes[i], DPxPTR(TgtPtrBegin),
@@ -2155,24 +2161,29 @@ static int target(int32_t device_id, voi
}
}
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
- void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *),
- IsLast, false);
- TgtPtrBase = TgtPtrBegin; // no offset for ptrs.
+ TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
+ false);
+ TgtBaseOffset = 0; // no offset for ptrs.
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
"object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
DPxPTR(HstPtrBase));
} else {
- void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i],
- IsLast, false);
- uint64_t PtrDelta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
- TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - PtrDelta);
+ TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
+ false);
+ TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
+ void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
}
- tgt_args.push_back(TgtPtrBase);
+ tgt_args.push_back(TgtPtrBegin);
+ tgt_offsets.push_back(TgtBaseOffset);
}
// Push omp handle.
tgt_args.push_back((void *)0);
+ tgt_offsets.push_back(0);
+
+ assert(tgt_args.size() == tgt_offsets.size() &&
+ "Size mismatch in arguments and offsets");
// Pop loop trip count
uint64_t ltc = Device.loopTripCnt;
@@ -2185,10 +2196,11 @@ static int target(int32_t device_id, voi
DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
if (IsTeamConstruct) {
rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
- &tgt_args[0], tgt_args.size(), team_num, thread_limit, ltc);
+ &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
+ thread_limit, ltc);
} else {
rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
- &tgt_args[0], tgt_args.size());
+ &tgt_args[0], &tgt_offsets[0], tgt_args.size());
}
} else {
DP("Errors occurred while obtaining target arguments, skipping kernel "
Modified: openmp/trunk/libomptarget/src/omptarget.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptarget.h?rev=302663&r1=302662&r2=302663&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/omptarget.h (original)
+++ openmp/trunk/libomptarget/src/omptarget.h Wed May 10 09:12:36 2017
@@ -16,6 +16,7 @@
#define _OMPTARGET_H_
#include <stdint.h>
+#include <stddef.h>
#define OFFLOAD_SUCCESS (0)
#define OFFLOAD_FAIL (~0)
Added: openmp/trunk/libomptarget/src/omptargetplugin.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptargetplugin.h?rev=302663&view=auto
==============================================================================
--- openmp/trunk/libomptarget/src/omptargetplugin.h (added)
+++ openmp/trunk/libomptarget/src/omptargetplugin.h Wed May 10 09:12:36 2017
@@ -0,0 +1,92 @@
+//===-- omptargetplugin.h - Target dependent OpenMP Plugin API --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an interface between target independent OpenMP offload
+// runtime library libomptarget and target dependent plugin.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _OMPTARGETPLUGIN_H_
+#define _OMPTARGETPLUGIN_H_
+
+#include <omptarget.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Return the number of available devices of the type supported by the
+// target RTL.
+int32_t __tgt_rtl_number_of_devices(void);
+
+// Return an integer different from zero if the provided device image can be
+// supported by the runtime. The functionality is similar to comparing the
+// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
+// lightweight query to determine if the RTL is suitable for an image without
+// having to load the library, which can be expensive.
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
+
+// Initialize the specified device. In case of success return 0; otherwise
+// return an error code.
+int32_t __tgt_rtl_init_device(int32_t ID);
+
+// Pass an executable image section described by image to the specified
+// device and prepare an address table of target entities. In case of error,
+// return NULL. Otherwise, return a pointer to the built address table.
+// Individual entries in the table may also be NULL, when the corresponding
+// offload region is not supported on the target device.
+__tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
+ __tgt_device_image *Image);
+
+// Allocate data on the particular target device, of the specified size.
+// HostPtr is a address of the host data the allocated target data
+// will be associated with (HostPtr may be NULL if it is not known at
+// allocation time, like for example it would be for target data that
+// is allocated by omp_target_alloc() API). Return address of the
+// allocated data on the target that will be used by libomptarget.so to
+// initialize the target data mapping structures. These addresses are
+// used to generate a table of target variables to pass to
+// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
+// case an error occurred on the target device.
+void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr);
+
+// Pass the data content to the target device using the target address.
+// In case of success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
+ int64_t Size);
+
+// Retrieve the data content from the target device using its address.
+// In case of success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
+ int64_t Size);
+
+// De-allocate the data referenced by target ptr on the device. In case of
+// success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
+
+// Transfer control to the offloaded entry Entry on the target device.
+// Args and Offsets are arrays of NumArgs size of target addresses and
+// offsets. An offset should be added to the target address before passing it
+// to the outlined function on device side. In case of success, return zero.
+// Otherwise, return an error code.
+int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
+ ptrdiff_t *Offsets, int32_t NumArgs);
+
+// Similar to __tgt_rtl_run_target_region, but additionally specify the
+// number of teams to be created and a number of threads in each team.
+int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
+ ptrdiff_t *Offsets, int32_t NumArgs,
+ int32_t NumTeams, int32_t ThreadLimit,
+ uint64_t loop_tripcount);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _OMPTARGETPLUGIN_H_
More information about the Openmp-commits
mailing list