[Openmp-commits] [openmp] 6b9e43c - [Openmp][VE] Libomptarget plugin for NEC SX-Aurora

Simon Moll via Openmp-commits openmp-commits at lists.llvm.org
Tue May 12 01:48:20 PDT 2020


Author: Manoel Roemmer
Date: 2020-05-12T10:47:30+02:00
New Revision: 6b9e43c67e0b109881db2524ec5207c4a888e7f7

URL: https://github.com/llvm/llvm-project/commit/6b9e43c67e0b109881db2524ec5207c4a888e7f7
DIFF: https://github.com/llvm/llvm-project/commit/6b9e43c67e0b109881db2524ec5207c4a888e7f7.diff

LOG: [Openmp][VE] Libomptarget plugin for NEC SX-Aurora

This patch adds a libomptarget plugin for the NEC SX-Aurora TSUBASA Vector
Engine (VE target).  The code is largely based on the existing generic-elf
plugin and uses the NEC VEO and VEOSINFO libraries for offloading.

Differential Revision: https://reviews.llvm.org/D76843

Added: 
    openmp/libomptarget/plugins/ve/CMakeLists.txt
    openmp/libomptarget/plugins/ve/src/rtl.cpp

Modified: 
    openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
    openmp/libomptarget/plugins/CMakeLists.txt
    openmp/libomptarget/plugins/common/elf_common.c
    openmp/libomptarget/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
index dbf8c381de13..95254e7a9e12 100644
--- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
+++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
@@ -15,6 +15,7 @@
 # libffi : required to launch target kernels given function and argument 
 #          pointers.
 # CUDA : required to control offloading to NVIDIA GPUs.
+# VEOS : required to control offloading to NEC Aurora.
 
 include (FindPackageHandleStandardArgs)
 
@@ -162,6 +163,61 @@ find_package_handle_standard_args(
 mark_as_advanced(LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES)
 
 ################################################################################
+# Looking for VEO...
+################################################################################
+
+find_path (
+  LIBOMPTARGET_DEP_VEO_INCLUDE_DIR
+  NAMES
+    ve_offload.h
+  PATHS
+    /usr/include
+    /usr/local/include
+    /opt/local/include
+    /sw/include
+    /opt/nec/ve/veos/include
+    ENV CPATH
+  PATH_SUFFIXES
+    libveo)
+
+find_library (
+  LIBOMPTARGET_DEP_VEO_LIBRARIES
+  NAMES
+    veo
+  PATHS
+    /usr/lib
+    /usr/local/lib
+    /opt/local/lib
+    /sw/lib
+    /opt/nec/ve/veos/lib64
+    ENV LIBRARY_PATH
+    ENV LD_LIBRARY_PATH)
+
+find_library(
+  LIBOMPTARGET_DEP_VEOSINFO_LIBRARIES
+  NAMES
+    veosinfo
+  PATHS
+    /usr/lib
+    /usr/local/lib
+    /opt/local/lib
+    /sw/lib
+    /opt/nec/ve/veos/lib64
+    ENV LIBRARY_PATH
+    ENV LD_LIBRARY_PATH)
+
+set(LIBOMPTARGET_DEP_VEO_INCLUDE_DIRS ${LIBOMPTARGET_DEP_VEO_INCLUDE_DIR})
+find_package_handle_standard_args(
+  LIBOMPTARGET_DEP_VEO
+  DEFAULT_MSG
+  LIBOMPTARGET_DEP_VEO_LIBRARIES
+  LIBOMPTARGET_DEP_VEOSINFO_LIBRARIES
+  LIBOMPTARGET_DEP_VEO_INCLUDE_DIRS)
+
+mark_as_advanced(
+  LIBOMPTARGET_DEP_VEO_FOUND
+  LIBOMPTARGET_DEP_VEO_INCLUDE_DIRS)
+
 # Looking for CUDA libdevice subdirectory
 #
 # Special case for Debian/Ubuntu to have nvidia-cuda-toolkit work

diff  --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt
index bb3f9c908087..f8372b619407 100644
--- a/openmp/libomptarget/plugins/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/CMakeLists.txt
@@ -69,6 +69,7 @@ add_subdirectory(aarch64)
 add_subdirectory(cuda)
 add_subdirectory(ppc64)
 add_subdirectory(ppc64le)
+add_subdirectory(ve)
 add_subdirectory(x86_64)
 
 # Make sure the parent scope can see the plugins that will be created.

diff  --git a/openmp/libomptarget/plugins/common/elf_common.c b/openmp/libomptarget/plugins/common/elf_common.c
index b0efd1abc148..8cbf8aadca28 100644
--- a/openmp/libomptarget/plugins/common/elf_common.c
+++ b/openmp/libomptarget/plugins/common/elf_common.c
@@ -71,3 +71,41 @@ static inline int32_t elf_check_machine(__tgt_device_image *image,
   elf_end(e);
   return MachineID == target_id;
 }
+
+static inline int32_t elf_is_dynamic(__tgt_device_image *image) {
+
+  char *img_begin = (char *)image->ImageStart;
+  char *img_end = (char *)image->ImageEnd;
+  size_t img_size = img_end - img_begin;
+
+  // Obtain elf handler
+  Elf *e = elf_memory(img_begin, img_size);
+  if (!e) {
+    DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
+    return 0;
+  }
+
+  Elf64_Ehdr *eh64 = elf64_getehdr(e);
+  Elf32_Ehdr *eh32 = elf32_getehdr(e);
+
+  if (!eh64 && !eh32) {
+    DP("Unable to get machine ID from ELF file!\n");
+    elf_end(e);
+    return 0;
+  }
+
+  uint16_t Type;
+  if (eh64 && !eh32)
+    Type = eh64->e_type;
+  else if (eh32 && !eh64)
+    Type = eh32->e_type;
+  else {
+    DP("Ambiguous ELF header!\n");
+    elf_end(e);
+    return 0;
+  }
+
+  elf_end(e);
+  DP("ELF Type: %d\n", Type);
+  return Type == ET_DYN;
+}

diff  --git a/openmp/libomptarget/plugins/ve/CMakeLists.txt b/openmp/libomptarget/plugins/ve/CMakeLists.txt
new file mode 100644
index 000000000000..3355d7347aee
--- /dev/null
+++ b/openmp/libomptarget/plugins/ve/CMakeLists.txt
@@ -0,0 +1,49 @@
+##===----------------------------------------------------------------------===##
+#
+# Build a plugin for a NEC Aurora machine if available. (Can also run on host)
+#
+##===----------------------------------------------------------------------===##
+
+
+if(${LIBOMPTARGET_DEP_VEO_FOUND})
+  libomptarget_say("Building SX-Aurora VE offloading plugin.")
+  set(additional_libs "")
+  set(additional_libs ${LIBOMPTARGET_DEP_VEO_LIBRARIES}
+                      ${LIBOMPTARGET_DEP_VEOSINFO_LIBRARIES}
+                      ${additional_libs})
+
+  set(tmachine_name "ve")
+  set(tmachine_libname "ve")
+  set(tmachine_triple "ve-unknown-linux-unknown")
+  set(elf_machine_id 251)
+
+  include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR})
+  include_directories(${LIBOMPTARGET_DEP_VEO_INCLUDE_DIR})
+
+
+  # Define macro to be used as prefix of the runtime messages for this target.
+  add_definitions("-DTARGET_NAME=${tmachine_name}")
+
+  # Define macro with the ELF ID for this target.
+  add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
+
+  add_library("omptarget.rtl.${tmachine_libname}" SHARED
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/rtl.cpp)
+
+  # Install plugin under the lib destination folder.
+  install(TARGETS "omptarget.rtl.${tmachine_libname}"
+    LIBRARY DESTINATION lib${OPENMP_LIBDIR_SUFFIX})
+
+  target_link_libraries(
+    "omptarget.rtl.${tmachine_libname}"
+    ${LIBOMPTARGET_DEP_LIBFFI_LIBRARIES}
+    ${LIBOMPTARGET_DEP_LIBELF_LIBRARIES}
+    ${additional_libs}
+    "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports -Wl,-z,defs")
+
+  # Report to the parent scope that we are building a plugin.
+  set(LIBOMPTARGET_SYSTEM_TARGETS
+    "${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple}" PARENT_SCOPE)
+else()
+    libomptarget_say("Not building nec-aurora plugin: libveo or libveosinfo not found.")
+endif()

diff  --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp
new file mode 100644
index 000000000000..ec89932a76e1
--- /dev/null
+++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp
@@ -0,0 +1,464 @@
+//===-RTLs/nec-aurora/src/rtl.cpp - Target RTLs Implementation - C++ -*-======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RTL for NEC Aurora TSUBASA machines
+//
+//===----------------------------------------------------------------------===//
+
+#include "omptargetplugin.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cerrno>
+#include <cstring>
+#include <list>
+#include <stdlib.h>
+#include <string>
+#include <sys/stat.h>
+#include <ve_offload.h>
+#include <vector>
+#include <veosinfo/veosinfo.h>
+
+#ifndef TARGET_ELF_ID
+#define TARGET_ELF_ID 0
+#endif
+
+#ifdef OMPTARGET_DEBUG
+static int DebugLevel = 0;
+
+#define GETNAME2(name) #name
+#define GETNAME(name) GETNAME2(name)
+#define DP(...)                                                                \
+  do {                                                                         \
+    if (DebugLevel > 0) {                                                      \
+      DEBUGP("Target " GETNAME(TARGET_NAME) " RTL", __VA_ARGS__);              \
+    }                                                                          \
+  } while (false)
+#else // OMPTARGET_DEBUG
+#define DP(...)                                                                \
+  {}
+#endif // OMPTARGET_DEBUG
+
+#include "../../common/elf_common.c"
+
+struct DynLibTy {
+  char *FileName;
+  uint64_t VeoLibHandle;
+};
+
+/// Keep entries table per device.
+struct FuncOrGblEntryTy {
+  __tgt_target_table Table;
+  std::vector<__tgt_offload_entry> Entries;
+};
+
+class RTLDeviceInfoTy {
+  std::vector<std::list<FuncOrGblEntryTy>> FuncOrGblEntry;
+
+public:
+  std::vector<struct veo_proc_handle *> ProcHandles;
+  std::vector<struct veo_thr_ctxt *> Contexts;
+  std::vector<uint64_t> LibraryHandles;
+  std::list<DynLibTy> DynLibs;
+  // Maps OpenMP device Ids to Ve nodeids
+  std::vector<int> NodeIds;
+
+  void buildOffloadTableFromHost(int32_t device_id, uint64_t VeoLibHandle,
+                                 __tgt_offload_entry *HostBegin,
+                                 __tgt_offload_entry *HostEnd) {
+    FuncOrGblEntry[device_id].emplace_back();
+    std::vector<__tgt_offload_entry> &T =
+        FuncOrGblEntry[device_id].back().Entries;
+    T.clear();
+    for (__tgt_offload_entry *i = HostBegin; i != HostEnd; ++i) {
+      char *SymbolName = i->name;
+      // we have not enough access to the target memory to conveniently parse
+      // the offload table there so we need to lookup every symbol with the host
+      // table
+      DP("Looking up symbol: %s\n", SymbolName);
+      uint64_t SymbolTargetAddr =
+          veo_get_sym(ProcHandles[device_id], VeoLibHandle, SymbolName);
+      __tgt_offload_entry Entry;
+
+      if (!SymbolTargetAddr) {
+        DP("Symbol %s not found in target image\n", SymbolName);
+        Entry = {NULL, NULL, 0, 0, 0};
+      } else {
+        DP("Found symbol %s successfully in target image (addr: %p)\n",
+           SymbolName, reinterpret_cast<void *>(SymbolTargetAddr));
+        Entry = { reinterpret_cast<void *>(SymbolTargetAddr),
+                  i->name,
+                  i->size,
+                  i->flags,
+                  0 };
+      }
+
+      T.push_back(Entry);
+    }
+
+    FuncOrGblEntry[device_id].back().Table.EntriesBegin = &T.front();
+    FuncOrGblEntry[device_id].back().Table.EntriesEnd = &T.back() + 1;
+  }
+
+  __tgt_target_table *getOffloadTable(int32_t device_id) {
+    return &FuncOrGblEntry[device_id].back().Table;
+  }
+
+  RTLDeviceInfoTy() {
+#ifdef OMPTARGET_DEBUG
+    if (char *envStr = getenv("LIBOMPTARGET_DEBUG")) {
+      DebugLevel = std::stoi(envStr);
+    }
+#endif // OMPTARGET_DEBUG
+
+    struct ve_nodeinfo node_info;
+    ve_node_info(&node_info);
+
+    // Build a predictable mapping between VE node ids and OpenMP device ids.
+    // This is necessary, because nodes can be missing or offline and (active)
+    // node ids are thus not consecutive. The entries in ve_nodeinfo may also
+    // not be in the order of their node ids.
+    for (int i = 0; i < node_info.total_node_count; ++i) {
+      if (node_info.status[i] == 0) {
+        NodeIds.push_back(node_info.nodeid[i]);
+      }
+    }
+
+    // Because the entries in ve_nodeinfo may not be in the order of their node
+    // ids, we sort NodeIds to get a predictable mapping.
+    std::sort(NodeIds.begin(), NodeIds.end());
+
+    int NumDevices = NodeIds.size();
+    DP("Found %i VE devices\n", NumDevices);
+    ProcHandles.resize(NumDevices, NULL);
+    Contexts.resize(NumDevices, NULL);
+    FuncOrGblEntry.resize(NumDevices);
+    LibraryHandles.resize(NumDevices);
+  }
+
+  ~RTLDeviceInfoTy() {
+    for (auto &ctx : Contexts) {
+      if (ctx != NULL) {
+        if (veo_context_close(ctx) != 0) {
+          DP("Failed to close VEO context.\n");
+        }
+      }
+    }
+
+    for (auto &hdl : ProcHandles) {
+      if (hdl != NULL) {
+        veo_proc_destroy(hdl);
+      }
+    }
+
+    for (auto &lib : DynLibs) {
+      if (lib.FileName) {
+        remove(lib.FileName);
+      }
+    }
+  }
+};
+
+static RTLDeviceInfoTy DeviceInfo;
+
+static int target_run_function_wait(uint32_t DeviceID, uint64_t FuncAddr,
+                                    struct veo_args *args, uint64_t *RetVal) {
+  DP("Running function with entry point %p\n",
+     reinterpret_cast<void *>(FuncAddr));
+  uint64_t RequestHandle =
+      veo_call_async(DeviceInfo.Contexts[DeviceID], FuncAddr, args);
+  if (RequestHandle == VEO_REQUEST_ID_INVALID) {
+    DP("Execution of entry point %p failed\n",
+       reinterpret_cast<void *>(FuncAddr));
+    return OFFLOAD_FAIL;
+  }
+
+  DP("Function at address %p called (VEO request ID: %" PRIu64 ")\n",
+     reinterpret_cast<void *>(FuncAddr), RequestHandle);
+
+  int ret = veo_call_wait_result(DeviceInfo.Contexts[DeviceID], RequestHandle,
+                                 RetVal);
+  if (ret != 0) {
+    DP("Waiting for entry point %p failed (Error code %d)\n",
+       reinterpret_cast<void *>(FuncAddr), ret);
+    return OFFLOAD_FAIL;
+  }
+  return OFFLOAD_SUCCESS;
+}
+
+
+// Return the number of available devices of the type supported by the
+// target RTL.
+int32_t __tgt_rtl_number_of_devices(void) { return DeviceInfo.NodeIds.size(); }
+
+// Return an integer 
diff erent from zero if the provided device image can be
+// supported by the runtime. The functionality is similar to comparing the
+// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
+// lightweight query to determine if the RTL is suitable for an image without
+// having to load the library, which can be expensive.
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
+#if TARGET_ELF_ID < 1
+  return 0;
+#else
+  return elf_check_machine(Image, TARGET_ELF_ID);
+#endif
+}
+
+// Initialize the specified device. In case of success return 0; otherwise
+// return an error code.
+int32_t __tgt_rtl_init_device(int32_t ID) {
+  DP("Available VEO version: %i\n", veo_api_version());
+
+  // At the moment we do not really initialize (i.e. create a process or
+  // context on) the device here, but in "__tgt_rtl_load_binary".
+  // The reason for this is, that, when we create a process for a statically
+  // linked binary, the VEO api needs us to already supply the binary (but we
+  // can load a dynamically linked binary later, after we create the process).
+  // At this stage, we cannot check if we have a dynamically or statically
+  // linked binary so we defer process creation until we know.
+  return OFFLOAD_SUCCESS;
+}
+
+// Pass an executable image section described by image to the specified
+// device and prepare an address table of target entities. In case of error,
+// return NULL. Otherwise, return a pointer to the built address table.
+// Individual entries in the table may also be NULL, when the corresponding
+// offload region is not supported on the target device.
+__tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
+                                          __tgt_device_image *Image) {
+  DP("Dev %d: load binary from " DPxMOD " image\n", ID,
+     DPxPTR(Image->ImageStart));
+
+  assert(ID >= 0 && "bad dev id");
+
+  size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
+  size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin);
+  DP("Expecting to have %zd entries defined.\n", NumEntries);
+
+  // load dynamic library and get the entry points. We use the dl library
+  // to do the loading of the library, but we could do it directly to avoid the
+  // dump to the temporary file.
+  //
+  // 1) Create tmp file with the library contents.
+  // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
+  char tmp_name[] = "/tmp/tmpfile_XXXXXX";
+  int tmp_fd = mkstemp(tmp_name);
+
+  if (tmp_fd == -1) {
+    return NULL;
+  }
+
+  FILE *ftmp = fdopen(tmp_fd, "wb");
+
+  if (!ftmp) {
+    DP("fdopen() for %s failed. Could not write target image\n", tmp_name);
+    return NULL;
+  }
+
+  fwrite(Image->ImageStart, ImageSize, 1, ftmp);
+
+  // at least for the static case we need to change the permissions
+  chmod(tmp_name, 0700);
+
+  DP("Wrote target image to %s. ImageSize=%zu\n", tmp_name, ImageSize);
+
+  fclose(ftmp);
+
+  // See comment in "__tgt_rtl_init_device"
+  bool is_dyn = true;
+  if (DeviceInfo.ProcHandles[ID] == NULL) {
+    struct veo_proc_handle *proc_handle;
+    is_dyn = elf_is_dynamic(Image);
+    // If we have a dynamically linked image, we create the process handle, then
+    // the thread, and then load the image.
+    // If we have a statically linked image, we need to create the process
+    // handle and load the image at the same time with veo_proc_create_static().
+    if (is_dyn) {
+      proc_handle = veo_proc_create(DeviceInfo.NodeIds[ID]);
+      if (!proc_handle) {
+        DP("veo_proc_create() failed for device %d\n", ID);
+        return NULL;
+      }
+    } else {
+      proc_handle = veo_proc_create_static(DeviceInfo.NodeIds[ID], tmp_name);
+      if (!proc_handle) {
+        DP("veo_proc_create_static() failed for device %d, image=%s\n", ID,
+           tmp_name);
+        return NULL;
+      }
+    }
+    DeviceInfo.ProcHandles[ID] = proc_handle;
+  }
+
+  if (DeviceInfo.Contexts[ID] == NULL) {
+    struct veo_thr_ctxt *ctx = veo_context_open(DeviceInfo.ProcHandles[ID]);
+
+    if (!ctx) {
+      DP("veo_context_open() failed: %s\n", std::strerror(errno));
+      return NULL;
+    }
+
+    DeviceInfo.Contexts[ID] = ctx;
+  }
+
+  DP("Aurora device successfully initialized with loaded binary: "
+     "proc_handle=%p, ctx=%p\n",
+     DeviceInfo.ProcHandles[ID], DeviceInfo.Contexts[ID]);
+
+  uint64_t LibHandle = 0UL;
+  if (is_dyn) {
+    LibHandle = veo_load_library(DeviceInfo.ProcHandles[ID], tmp_name);
+
+    if (!LibHandle) {
+      DP("veo_load_library() failed: LibHandle=%" PRIu64
+         " Name=%s. Set env VEORUN_BIN for static linked target code.\n",
+         LibHandle, tmp_name);
+      return NULL;
+    }
+
+    DP("Successfully loaded library dynamically\n");
+  } else {
+    DP("Symbol table is expected to have been created by "
+       "veo_create_proc_static()\n");
+  }
+
+  DynLibTy Lib = {tmp_name, LibHandle};
+  DeviceInfo.DynLibs.push_back(Lib);
+  DeviceInfo.LibraryHandles[ID] = LibHandle;
+
+  DeviceInfo.buildOffloadTableFromHost(ID, LibHandle, Image->EntriesBegin,
+                                       Image->EntriesEnd);
+
+  return DeviceInfo.getOffloadTable(ID);
+}
+
+// Allocate data on the particular target device, of the specified size.
+// HostPtr is a address of the host data the allocated target data
+// will be associated with (HostPtr may be NULL if it is not known at
+// allocation time, like for example it would be for target data that
+// is allocated by omp_target_alloc() API). Return address of the
+// allocated data on the target that will be used by libomptarget.so to
+// initialize the target data mapping structures. These addresses are
+// used to generate a table of target variables to pass to
+// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
+// case an error occurred on the target device.
+void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr) {
+  int ret;
+  uint64_t addr;
+
+  if (DeviceInfo.ProcHandles[ID] == NULL) {
+    struct veo_proc_handle *proc_handle;
+    proc_handle = veo_proc_create(DeviceInfo.NodeIds[ID]);
+    if (!proc_handle) {
+      DP("veo_proc_create() failed for device %d\n", ID);
+      return NULL;
+    }
+    DeviceInfo.ProcHandles[ID] = proc_handle;
+    DP("Aurora device successfully initialized: proc_handle=%p", proc_handle);
+  }
+
+  ret = veo_alloc_mem(DeviceInfo.ProcHandles[ID], &addr, Size);
+  DP("Allocate target memory: device=%d, target addr=%p, size=%" PRIu64 "\n",
+     ID, reinterpret_cast<void *>(addr), Size);
+  if (ret != 0) {
+    DP("veo_alloc_mem(%d, %p, %" PRIu64 ") failed with error code %d\n",
+       ID, reinterpret_cast<void *>(addr), Size, ret);
+    return NULL;
+  }
+
+  return reinterpret_cast<void *>(addr);
+}
+
+// Pass the data content to the target device using the target address.
+// In case of success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
+                              int64_t Size) {
+  int ret = veo_write_mem(DeviceInfo.ProcHandles[ID], (uint64_t)TargetPtr,
+                          HostPtr, (size_t)Size);
+  if (ret != 0) {
+    DP("veo_write_mem() failed with error code %d\n", ret);
+    return OFFLOAD_FAIL;
+  }
+  return OFFLOAD_SUCCESS;
+}
+
+// Retrieve the data content from the target device using its address.
+// In case of success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
+                                int64_t Size) {
+  int ret = veo_read_mem(DeviceInfo.ProcHandles[ID], HostPtr,
+                         (uint64_t)TargetPtr, Size);
+  if (ret != 0) {
+    DP("veo_read_mem() failed with error code %d\n", ret);
+    return OFFLOAD_FAIL;
+  }
+  return OFFLOAD_SUCCESS;
+}
+
+// De-allocate the data referenced by target ptr on the device. In case of
+// success, return zero. Otherwise, return an error code.
+int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr) {
+  int ret =  veo_free_mem(DeviceInfo.ProcHandles[ID], (uint64_t)TargetPtr);
+
+  if (ret != 0) {
+    DP("veo_free_mem() failed with error code %d\n", ret);
+    return OFFLOAD_FAIL;
+  }
+  return OFFLOAD_SUCCESS;
+}
+
+// Similar to __tgt_rtl_run_target_region, but additionally specify the
+// number of teams to be created and a number of threads in each team.
+int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
+                                         ptr
diff _t *Offsets, int32_t NumArgs,
+                                         int32_t NumTeams, int32_t ThreadLimit,
+                                         uint64_t loop_tripcount) {
+  int ret;
+
+  // ignore team num and thread limit.
+  std::vector<void *> ptrs(NumArgs);
+
+  struct veo_args *TargetArgs;
+  TargetArgs = veo_args_alloc();
+
+  if (TargetArgs == NULL) {
+    DP("Could not allocate VEO args\n");
+    return OFFLOAD_FAIL;
+  }
+
+  for (int i = 0; i < NumArgs; ++i) {
+    ret = veo_args_set_u64(TargetArgs, i, (intptr_t)Args[i]);
+
+    if (ret != 0) {
+      DP("veo_args_set_u64() has returned %d for argnum=%d and value %p\n",
+         ret, i, Args[i]);
+      return OFFLOAD_FAIL;
+    }
+  }
+
+  uint64_t RetVal;
+  if (target_run_function_wait(ID, reinterpret_cast<uint64_t>(Entry),
+                               TargetArgs, &RetVal) != OFFLOAD_SUCCESS) {
+    veo_args_free(TargetArgs);
+    return OFFLOAD_FAIL;
+  }
+  veo_args_free(TargetArgs);
+  return OFFLOAD_SUCCESS;
+}
+
+// Transfer control to the offloaded entry Entry on the target device.
+// Args and Offsets are arrays of NumArgs size of target addresses and
+// offsets. An offset should be added to the target address before passing it
+// to the outlined function on device side. In case of success, return zero.
+// Otherwise, return an error code.
+int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
+                                    ptr
diff _t *Offsets, int32_t NumArgs) {
+  return __tgt_rtl_run_target_team_region(ID, Entry, Args, Offsets, NumArgs, 1,
+                                          1, 0);
+}

diff  --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 1439f67e7c64..6ce4f88d4c49 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -23,6 +23,7 @@
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
+    /* SX-Aurora VE target  */ "libomptarget.rtl.ve.so",
     /* PowerPC target */ "libomptarget.rtl.ppc64.so",
     /* x86_64 target  */ "libomptarget.rtl.x86_64.so",
     /* CUDA target    */ "libomptarget.rtl.cuda.so",


        


More information about the Openmp-commits mailing list