[Openmp-commits] [clang] [llvm] [openmp] [Libomptarget] Statically link all plugin runtimes (PR #87009)

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Fri Mar 29 06:17:22 PDT 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/87009

>From bb5f330cc3d5e0758825b25e3b8209fb7af6be79 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 27 Mar 2024 15:27:16 -0500
Subject: [PATCH 1/3] [Libomptarget] Rename `libomptarget.rtl.x86_64` to
 `libomptarget.rtl.host`

Summary:
All of these are functionally the same code, just compiled for separate
architectures. We currently do not expose a way to execute these on
separate architectures as the host plugin works using `dlopen` into the
same process, and therefore cannot possibly be an incompatible
architecture. (This could work with a remote plugin, but this is not
supported yet).

This patch simply renames all of these to the same thing so we no longer
need to check around for its varying definitions.
---
 .../plugins-nextgen/host/CMakeLists.txt       | 36 +++++++++----------
 openmp/libomptarget/src/CMakeLists.txt        |  5 +--
 2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt
index ccbf7d033fd663..0954f8367654f6 100644
--- a/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt
@@ -14,36 +14,36 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le$")
 endif()
 
 # Create the library and add the default arguments.
-add_target_library(omptarget.rtl.${machine} ${machine})
+add_target_library(omptarget.rtl.host ${machine})
 
-target_sources(omptarget.rtl.${machine} PRIVATE src/rtl.cpp)
+target_sources(omptarget.rtl.host PRIVATE src/rtl.cpp)
 
 if(LIBOMPTARGET_DEP_LIBFFI_FOUND)
   libomptarget_say("Building ${machine} plugin linked with libffi")
   if(FFI_STATIC_LIBRARIES)
-    target_link_libraries(omptarget.rtl.${machine} PRIVATE FFI::ffi_static)
+    target_link_libraries(omptarget.rtl.host PRIVATE FFI::ffi_static)
   else()
-    target_link_libraries(omptarget.rtl.${machine} PRIVATE FFI::ffi)
+    target_link_libraries(omptarget.rtl.host PRIVATE FFI::ffi)
   endif()
 else()
   libomptarget_say("Building ${machine} plugin for dlopened libffi")
-  target_sources(omptarget.rtl.${machine} PRIVATE dynamic_ffi/ffi.cpp)
-  target_include_directories(omptarget.rtl.${machine} PRIVATE dynamic_ffi)
+  target_sources(omptarget.rtl.host PRIVATE dynamic_ffi/ffi.cpp)
+  target_include_directories(omptarget.rtl.host PRIVATE dynamic_ffi)
 endif()
 
 # Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.${machine}
+install(TARGETS omptarget.rtl.host
         LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.${machine} PROPERTIES
+set_target_properties(omptarget.rtl.host PROPERTIES
   INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
   POSITION_INDEPENDENT_CODE ON
   CXX_VISIBILITY_PRESET protected)
 
-target_include_directories(omptarget.rtl.${machine} PRIVATE
+target_include_directories(omptarget.rtl.host PRIVATE
                            ${LIBOMPTARGET_INCLUDE_DIR})
 
 if(LIBOMPTARGET_DEP_LIBFFI_FOUND)
-  list(APPEND LIBOMPTARGET_TESTED_PLUGINS omptarget.rtl.${machine})
+  list(APPEND LIBOMPTARGET_TESTED_PLUGINS omptarget.rtl.host)
   set(LIBOMPTARGET_TESTED_PLUGINS
       "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
 else()
@@ -53,29 +53,29 @@ endif()
 # Define the target specific triples and ELF machine values.
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le$" OR
    CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64$")
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE TARGET_ELF_ID=EM_PPC64)
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE
+  target_compile_definitions(omptarget.rtl.host PRIVATE TARGET_ELF_ID=EM_PPC64)
+  target_compile_definitions(omptarget.rtl.host PRIVATE
       LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE="powerpc64-ibm-linux-gnu")
   list(APPEND LIBOMPTARGET_SYSTEM_TARGETS 
        "powerpc64-ibm-linux-gnu" "powerpc64-ibm-linux-gnu-LTO")
   set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64$")
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE TARGET_ELF_ID=EM_X86_64)
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE
+  target_compile_definitions(omptarget.rtl.host PRIVATE TARGET_ELF_ID=EM_X86_64)
+  target_compile_definitions(omptarget.rtl.host PRIVATE
       LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE="x86_64-pc-linux-gnu")
   list(APPEND LIBOMPTARGET_SYSTEM_TARGETS 
        "x86_64-pc-linux-gnu" "x86_64-pc-linux-gnu-LTO")
   set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64$")
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE TARGET_ELF_ID=EM_AARCH64)
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE
+  target_compile_definitions(omptarget.rtl.host PRIVATE TARGET_ELF_ID=EM_AARCH64)
+  target_compile_definitions(omptarget.rtl.host PRIVATE
       LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE="aarch64-unknown-linux-gnu")
   list(APPEND LIBOMPTARGET_SYSTEM_TARGETS 
        "aarch64-unknown-linux-gnu" "aarch64-unknown-linux-gnu-LTO")
   set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x$")
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE TARGET_ELF_ID=EM_S390)
-  target_compile_definitions(omptarget.rtl.${machine} PRIVATE
+  target_compile_definitions(omptarget.rtl.host PRIVATE TARGET_ELF_ID=EM_S390)
+  target_compile_definitions(omptarget.rtl.host PRIVATE
       LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE="s390x-ibm-linux-gnu")
   list(APPEND LIBOMPTARGET_SYSTEM_TARGETS 
        "s390x-ibm-linux-gnu" "s390x-ibm-linux-gnu-LTO")
diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt
index d0971bd4ef079e..9c6dbd0e93c8b4 100644
--- a/openmp/libomptarget/src/CMakeLists.txt
+++ b/openmp/libomptarget/src/CMakeLists.txt
@@ -68,12 +68,9 @@ endmacro()
 set(LIBOMPTARGET_PLUGINS_TO_LOAD "" CACHE STRING
   "Comma separated list of plugin names to look for at runtime")
 if (NOT LIBOMPTARGET_PLUGINS_TO_LOAD)
-	check_plugin_target(ppc64)
-	check_plugin_target(x86_64)
 	check_plugin_target(cuda)
-	check_plugin_target(aarch64)
 	check_plugin_target(amdgpu)
-	check_plugin_target(s390x)
+	check_plugin_target(host)
 endif()
 
 list(TRANSFORM LIBOMPTARGET_PLUGINS_TO_LOAD PREPEND "\"libomptarget.rtl.")

>From 586effaa86cb4b6568c44fe7df5175110d353f3c Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 27 Mar 2024 16:31:57 -0500
Subject: [PATCH 2/3] [Libomptarget] Rework interface for enabling plugins

Summary:
Previously we would build all of the plugins by default and then only
load some using the `LIBOMPTARGET_PLUGINS_TO_LOAD` variable. This patch
renamed this to `LIBOMPTARGET_PLUGINS_TO_BUILD` and changes whether or
not it will include the plugin in CMake.

Additionally this patch creates a new `Targets.def` file that allows us
to enumerate all of the enabled plugins. This is somewhat different from
the old method, and it's done this way for future use that will need to
be shared. This follows the same method that LLVM uses for its targets,
however it does require adding an extra include path.

Depends on https://github.com/llvm/llvm-project/pull/86868
---
 openmp/libomptarget/CMakeLists.txt            | 20 ++++++++++++++
 .../include/Shared/Targets.def.in             | 20 ++++++++++++++
 .../plugins-nextgen/CMakeLists.txt            |  9 ++++---
 .../plugins-nextgen/common/CMakeLists.txt     |  1 +
 openmp/libomptarget/src/CMakeLists.txt        | 19 +++----------
 openmp/libomptarget/src/PluginManager.cpp     | 27 +++++++++----------
 6 files changed, 64 insertions(+), 32 deletions(-)
 create mode 100644 openmp/libomptarget/include/Shared/Targets.def.in

diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt
index 531198fae01699..cdc79a855c57a3 100644
--- a/openmp/libomptarget/CMakeLists.txt
+++ b/openmp/libomptarget/CMakeLists.txt
@@ -41,6 +41,25 @@ if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS)
   message(FATAL_ERROR "Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS")
 endif()
 
+set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host)
+set(LIBOMPTARGET_PLUGINS_TO_BUILD "all" CACHE STRING
+    "Semicolon-separated list of plugins to use, or \"all\".")
+
+if(LIBOMPTARGET_PLUGINS_TO_BUILD STREQUAL "all")
+  set(LIBOMPTARGET_PLUGINS_TO_BUILD ${LIBOMPTARGET_ALL_PLUGIN_TARGETS})
+endif()
+
+set(LIBOMPTARGET_ENUM_PLUGIN_TARGETS "")
+foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD)
+  set(LIBOMPTARGET_ENUM_PLUGIN_TARGETS
+      "${LIBOMPTARGET_ENUM_PLUGIN_TARGETS}PLUGIN_TARGET(${plugin})\n")
+endforeach()
+string(STRIP ${LIBOMPTARGET_ENUM_PLUGIN_TARGETS} LIBOMPTARGET_ENUM_PLUGIN_TARGETS)
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/include/Shared/Targets.def.in
+  ${CMAKE_CURRENT_BINARY_DIR}/include/Shared/Targets.def
+)
+
 include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS})
 
 # This is a list of all the targets that are supported/tested right now.
@@ -126,6 +145,7 @@ set(LIBOMPTARGET_GPU_LIBC_SUPPORT ${LLVM_LIBC_GPU_BUILD} CACHE BOOL
 pythonize_bool(LIBOMPTARGET_GPU_LIBC_SUPPORT)
 
 set(LIBOMPTARGET_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
+set(LIBOMPTARGET_BINARY_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
 message(STATUS "OpenMP tools dir in libomptarget: ${LIBOMP_OMP_TOOLS_INCLUDE_DIR}")
 include_directories(${LIBOMP_OMP_TOOLS_INCLUDE_DIR})
 
diff --git a/openmp/libomptarget/include/Shared/Targets.def.in b/openmp/libomptarget/include/Shared/Targets.def.in
new file mode 100644
index 00000000000000..f34b523b4542bd
--- /dev/null
+++ b/openmp/libomptarget/include/Shared/Targets.def.in
@@ -0,0 +1,20 @@
+//===-- Shared/Targets.def - Target plugin enumerator -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Enumerates over all of the supported target plugins that are available to
+// the offloading library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PLUGIN_TARGET
+#  error Please define the macro PLUGIN_TARGET(TargetName)
+#endif
+
+ at LIBOMPTARGET_ENUM_PLUGIN_TARGETS@
+
+#undef PLUGIN_TARGET
diff --git a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
index dbd82ac945171e..df625e97c7ebf5 100644
--- a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
@@ -69,9 +69,12 @@ function(add_target_library target_name lib_name)
   set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET protected)
 endfunction()
 
-add_subdirectory(amdgpu)
-add_subdirectory(cuda)
-add_subdirectory(host)
+foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD)
+  if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${plugin})
+    message(FATAL_ERROR "Unknown plugin target '${plugin}'")
+  endif()
+  add_subdirectory(${plugin})
+endforeach()
 
 # Make sure the parent scope can see the plugins that will be created.
 set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
diff --git a/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
index a7350e662a7c9a..acf0af63f0508c 100644
--- a/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
@@ -62,6 +62,7 @@ target_link_options(PluginCommon PUBLIC ${offload_link_flags})
 target_include_directories(PluginCommon PUBLIC 
   ${CMAKE_CURRENT_SOURCE_DIR}/include
   ${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
+  ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
   ${LIBOMPTARGET_INCLUDE_DIR}
 )
 
diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt
index 9c6dbd0e93c8b4..b833c5199fab04 100644
--- a/openmp/libomptarget/src/CMakeLists.txt
+++ b/openmp/libomptarget/src/CMakeLists.txt
@@ -31,6 +31,7 @@ add_llvm_library(omptarget
 
   ADDITIONAL_HEADER_DIRS
   ${LIBOMPTARGET_INCLUDE_DIR}
+  ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
 
   LINK_COMPONENTS
   Support
@@ -43,7 +44,9 @@ add_llvm_library(omptarget
   NO_INSTALL_RPATH
   BUILDTREE_ONLY
 )
-target_include_directories(omptarget PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
+target_include_directories(omptarget PRIVATE 
+  ${LIBOMPTARGET_INCLUDE_DIR} ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
+)
 
 if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
   target_link_libraries(omptarget PRIVATE
@@ -59,20 +62,6 @@ target_compile_definitions(omptarget PRIVATE
 target_compile_options(omptarget PUBLIC ${offload_compile_flags})
 target_link_options(omptarget PUBLIC ${offload_link_flags})
 
-macro(check_plugin_target target)
-if (TARGET omptarget.rtl.${target})
-	list(APPEND LIBOMPTARGET_PLUGINS_TO_LOAD ${target})
-endif()
-endmacro()
-
-set(LIBOMPTARGET_PLUGINS_TO_LOAD "" CACHE STRING
-  "Comma separated list of plugin names to look for at runtime")
-if (NOT LIBOMPTARGET_PLUGINS_TO_LOAD)
-	check_plugin_target(cuda)
-	check_plugin_target(amdgpu)
-	check_plugin_target(host)
-endif()
-
 list(TRANSFORM LIBOMPTARGET_PLUGINS_TO_LOAD PREPEND "\"libomptarget.rtl.")
 list(TRANSFORM LIBOMPTARGET_PLUGINS_TO_LOAD APPEND "\"")
 list(JOIN LIBOMPTARGET_PLUGINS_TO_LOAD "," ENABLED_OFFLOAD_PLUGINS)
diff --git a/openmp/libomptarget/src/PluginManager.cpp b/openmp/libomptarget/src/PluginManager.cpp
index 792cae3e3dd583..dbb556c179e58c 100644
--- a/openmp/libomptarget/src/PluginManager.cpp
+++ b/openmp/libomptarget/src/PluginManager.cpp
@@ -23,9 +23,6 @@ using namespace llvm::sys;
 
 PluginManager *PM = nullptr;
 
-// List of all plugins that can support offloading.
-static const char *RTLNames[] = {ENABLED_OFFLOAD_PLUGINS};
-
 Expected<std::unique_ptr<PluginAdaptorTy>>
 PluginAdaptorTy::create(const std::string &Name) {
   DP("Attempting to load library '%s'...\n", Name.c_str());
@@ -95,17 +92,19 @@ void PluginManager::init() {
 
   // Attempt to open all the plugins and, if they exist, check if the interface
   // is correct and if they are supporting any devices.
-  for (const char *Name : RTLNames) {
-    auto PluginAdaptorOrErr =
-        PluginAdaptorTy::create(std::string(Name) + ".so");
-    if (!PluginAdaptorOrErr) {
-      [[maybe_unused]] std::string InfoMsg =
-          toString(PluginAdaptorOrErr.takeError());
-      DP("%s", InfoMsg.c_str());
-    } else {
-      PluginAdaptors.push_back(std::move(*PluginAdaptorOrErr));
-    }
-  }
+#define PLUGIN_TARGET(Name)                                                    \
+  do {                                                                         \
+    auto PluginAdaptorOrErr =                                                  \
+        PluginAdaptorTy::create("libomptarget.rtl." #Name ".so");              \
+    if (!PluginAdaptorOrErr) {                                                 \
+      [[maybe_unused]] std::string InfoMsg =                                   \
+          toString(PluginAdaptorOrErr.takeError());                            \
+      DP("%s", InfoMsg.c_str());                                               \
+    } else {                                                                   \
+      PluginAdaptors.push_back(std::move(*PluginAdaptorOrErr));                \
+    }                                                                          \
+  } while (false);
+#include "Shared/Targets.def"
 
   DP("RTLs loaded!\n");
 }

>From e720a4c1565ef23e3885606c89196c49be49676b Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 28 Mar 2024 16:18:19 -0500
Subject: [PATCH 3/3] [Libomptarget] Statically link all plugin runtimes

Summary:
This patch overhauls the `libomptarget` and plugin interface. Currently,
we define a C API and compile each plugin as a separate shared library.
Then, `libomptarget` loads these API functions and forwards its internal
calls to them. This was originally designed to allow multiple
implementations of a library to be live. However, since then no one has
used this functionality and it prevents us from using much nicer
interfaces. If the old behavior is desired it should instead be
implemented as a separate plugin.

This patch replaces the `PluginAdaptorTy` interface with the
`GenericPluginTy` that is used by the plugins. Each plugin exports a
`createPlugin_<name>` function that is used to get the specific
implementation. This code is now shared with `libomptarget`.

There are some notable improvements to this.
1. Massively improved lifetimes of life runtime objects
2. The plugins can use a C++ interface
3. Global state does not need to be duplicated for each plugin +
   libomptarget
4. Easier to use and add features and improve error handling
5. Less function call overhead / Improved LTO performance.

Additional changes in this plugin are related to contending with the
fact that state is now shared. Initialization and deinitialization is
now handled correctly and in phase with the underlying runtime, allowing
us to actually know when something is getting deallocated.

Depends on https://github.com/llvm/llvm-project/pull/86971 https://github.com/llvm/llvm-project/pull/86875 https://github.com/llvm/llvm-project/pull/86868
---
 clang/test/Driver/linker-wrapper-image.c      |   2 +-
 .../Frontend/Offloading/OffloadWrapper.cpp    |   7 +-
 openmp/libomptarget/include/PluginManager.h   |  61 ++---
 .../libomptarget/include/Shared/PluginAPI.h   | 232 ------------------
 .../libomptarget/include/Shared/PluginAPI.inc |  51 ----
 openmp/libomptarget/include/device.h          |   8 +-
 .../plugins-nextgen/CMakeLists.txt            |  19 +-
 .../plugins-nextgen/amdgpu/CMakeLists.txt     |   5 -
 .../plugins-nextgen/amdgpu/src/rtl.cpp        |  14 +-
 .../plugins-nextgen/common/CMakeLists.txt     |   4 +-
 .../common/include/PluginInterface.h          |  94 +------
 .../common/include/Utils/ELF.h                |   2 -
 .../plugins-nextgen/common/src/JIT.cpp        |  40 ++-
 .../common/src/PluginInterface.cpp            | 205 ----------------
 .../plugins-nextgen/cuda/CMakeLists.txt       |   5 -
 .../plugins-nextgen/cuda/src/rtl.cpp          |  14 +-
 .../plugins-nextgen/host/CMakeLists.txt       |   8 -
 .../plugins-nextgen/host/src/rtl.cpp          |  14 +-
 openmp/libomptarget/src/CMakeLists.txt        |   4 +
 openmp/libomptarget/src/OffloadRTL.cpp        |   1 +
 openmp/libomptarget/src/OpenMP/InteropAPI.cpp |   4 +-
 openmp/libomptarget/src/PluginManager.cpp     | 129 ++++------
 openmp/libomptarget/src/device.cpp            |   3 +-
 openmp/libomptarget/src/interface.cpp         |   2 +-
 .../kernelreplay/llvm-omp-kernel-replay.cpp   |   2 -
 .../unittests/Plugins/NextgenPluginsTest.cpp  |   1 -
 26 files changed, 127 insertions(+), 804 deletions(-)
 delete mode 100644 openmp/libomptarget/include/Shared/PluginAPI.h
 delete mode 100644 openmp/libomptarget/include/Shared/PluginAPI.inc

diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index d01445e3aed04e..5d5d62805e174d 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -30,8 +30,8 @@
 
 //      OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
 // OPENMP-NEXT: entry:
-// OPENMP-NEXT:   %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
 // OPENMP-NEXT:   call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
+// OPENMP-NEXT:   %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
 // OPENMP-NEXT:   ret void
 // OPENMP-NEXT: }
 
diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
index 7241d15ed1c670..8b6f9ea1f4cca3 100644
--- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
+++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
@@ -232,12 +232,13 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
   // Construct function body
   IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
 
+  Builder.CreateCall(RegFuncC, BinDesc);
+
   // Register the destructors with 'atexit'. This is expected by the CUDA
   // runtime and ensures that we clean up before dynamic objects are destroyed.
-  // This needs to be done before the runtime is called and registers its own.
+  // This needs to be done after plugin initialization to ensure that it is
+  // called before the plugin runtime is destroyed.
   Builder.CreateCall(AtExit, UnregFunc);
-
-  Builder.CreateCall(RegFuncC, BinDesc);
   Builder.CreateRetVoid();
 
   // Add this function to constructors.
diff --git a/openmp/libomptarget/include/PluginManager.h b/openmp/libomptarget/include/PluginManager.h
index eece7525e25e72..e49a4b24cab57b 100644
--- a/openmp/libomptarget/include/PluginManager.h
+++ b/openmp/libomptarget/include/PluginManager.h
@@ -16,7 +16,6 @@
 #include "DeviceImage.h"
 #include "ExclusiveAccess.h"
 #include "Shared/APITypes.h"
-#include "Shared/PluginAPI.h"
 #include "Shared/Requirements.h"
 
 #include "device.h"
@@ -34,38 +33,10 @@
 #include <mutex>
 #include <string>
 
-struct PluginManager;
-
-/// Plugin adaptors should be created via `PluginAdaptorTy::create` which will
-/// invoke the constructor and call `PluginAdaptorTy::init`. Eventual errors are
-/// reported back to the caller, otherwise a valid and initialized adaptor is
-/// returned.
-struct PluginAdaptorTy {
-  /// Try to create a plugin adaptor from a filename.
-  static llvm::Expected<std::unique_ptr<PluginAdaptorTy>>
-  create(const std::string &Name);
-
-  /// Name of the shared object file representing the plugin.
-  std::string Name;
-
-  /// Access to the shared object file representing the plugin.
-  std::unique_ptr<llvm::sys::DynamicLibrary> LibraryHandler;
-
-#define PLUGIN_API_HANDLE(NAME)                                                \
-  using NAME##_ty = decltype(__tgt_rtl_##NAME);                                \
-  NAME##_ty *NAME = nullptr;
+#include "PluginInterface.h"
+using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;
 
-#include "Shared/PluginAPI.inc"
-#undef PLUGIN_API_HANDLE
-
-  /// Create a plugin adaptor for filename \p Name with a dynamic library \p DL.
-  PluginAdaptorTy(const std::string &Name,
-                  std::unique_ptr<llvm::sys::DynamicLibrary> DL);
-
-  /// Initialize the plugin adaptor, this can fail in which case the adaptor is
-  /// useless.
-  llvm::Error init();
-};
+struct PluginManager;
 
 /// Struct for the data required to handle plugins
 struct PluginManager {
@@ -80,6 +51,8 @@ struct PluginManager {
 
   void init();
 
+  void deinit();
+
   // Register a shared library with all (compatible) RTLs.
   void registerLib(__tgt_bin_desc *Desc);
 
@@ -92,10 +65,10 @@ struct PluginManager {
         std::make_unique<DeviceImageTy>(TgtBinDesc, TgtDeviceImage));
   }
 
-  /// Initialize as many devices as possible for this plugin adaptor. Devices
-  /// that fail to initialize are ignored. Returns the offset the devices were
-  /// registered at.
-  void initDevices(PluginAdaptorTy &RTL);
+  /// Initialize as many devices as possible for this plugin. Devices  that fail
+  /// to initialize are ignored. Returns the offset the devices were registered
+  /// at.
+  void initDevices(GenericPluginTy &RTL);
 
   /// Return the device presented to the user as device \p DeviceNo if it is
   /// initialized and ready. Otherwise return an error explaining the problem.
@@ -151,8 +124,8 @@ struct PluginManager {
   // Initialize all plugins.
   void initAllPlugins();
 
-  /// Iterator range for all plugin adaptors (in use or not, but always valid).
-  auto pluginAdaptors() { return llvm::make_pointee_range(PluginAdaptors); }
+  /// Iterator range for all plugins (in use or not, but always valid).
+  auto plugins() { return llvm::make_pointee_range(Plugins); }
 
   /// Return the user provided requirements.
   int64_t getRequirements() const { return Requirements.getRequirements(); }
@@ -164,14 +137,14 @@ struct PluginManager {
   bool RTLsLoaded = false;
   llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc;
 
-  // List of all plugin adaptors, in use or not.
-  llvm::SmallVector<std::unique_ptr<PluginAdaptorTy>> PluginAdaptors;
+  // List of all plugins, in use or not.
+  llvm::SmallVector<std::unique_ptr<GenericPluginTy>> Plugins;
 
-  // Mapping of plugin adaptors to offsets in the device table.
-  llvm::DenseMap<const PluginAdaptorTy *, int32_t> DeviceOffsets;
+  // Mapping of plugins to offsets in the device table.
+  llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceOffsets;
 
-  // Mapping of plugin adaptors to the number of used devices.
-  llvm::DenseMap<const PluginAdaptorTy *, int32_t> DeviceUsed;
+  // Mapping of plugins to the number of used devices.
+  llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceUsed;
 
   // Set of all device images currently in use.
   llvm::DenseSet<const __tgt_device_image *> UsedImages;
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h
deleted file mode 100644
index ecf669c774f142..00000000000000
--- a/openmp/libomptarget/include/Shared/PluginAPI.h
+++ /dev/null
@@ -1,232 +0,0 @@
-//===-- Shared/PluginAPI.h - Target independent plugin API ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an interface between target independent OpenMP offload
-// runtime library libomptarget and target dependent plugin.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_SHARED_PLUGIN_API_H
-#define OMPTARGET_SHARED_PLUGIN_API_H
-
-#include <cstddef>
-#include <cstdint>
-
-#include "Shared/APITypes.h"
-
-extern "C" {
-
-// First method called on the plugin
-int32_t __tgt_rtl_init_plugin();
-
-// Return the number of available devices of the type supported by the
-// target RTL.
-int32_t __tgt_rtl_number_of_devices(void);
-
-// Return an integer different from zero if the provided device image can be
-// supported by the runtime. The functionality is similar to comparing the
-// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
-// lightweight query to determine if the RTL is suitable for an image without
-// having to load the library, which can be expensive.
-int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
-
-// Return an integer other than zero if the data can be exchaned from SrcDevId
-// to DstDevId. If it is data exchangable, the device plugin should provide
-// function to move data from source device to destination device directly.
-int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId);
-
-// Initialize the requires flags for the device.
-int64_t __tgt_rtl_init_requires(int64_t RequiresFlags);
-
-// Initialize the specified device. In case of success return 0; otherwise
-// return an error code.
-int32_t __tgt_rtl_init_device(int32_t ID);
-
-// Pass an executable image section described by image to the specified
-// device and prepare an address table of target entities. In case of error,
-// return NULL. Otherwise, return a pointer to the built address table.
-// Individual entries in the table may also be NULL, when the corresponding
-// offload region is not supported on the target device.
-int32_t __tgt_rtl_load_binary(int32_t ID, __tgt_device_image *Image,
-                              __tgt_device_binary *Binary);
-
-// Look up the device address of the named symbol in the given binary. Returns
-// non-zero on failure.
-int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size,
-                             const char *Name, void **DevicePtr);
-
-// Look up the device address of the named kernel in the given binary. Returns
-// non-zero on failure.
-int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name,
-                               void **DevicePtr);
-
-// Allocate data on the particular target device, of the specified size.
-// HostPtr is a address of the host data the allocated target data
-// will be associated with (HostPtr may be NULL if it is not known at
-// allocation time, like for example it would be for target data that
-// is allocated by omp_target_alloc() API). Return address of the
-// allocated data on the target that will be used by libomptarget.so to
-// initialize the target data mapping structures. These addresses are
-// used to generate a table of target variables to pass to
-// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
-// case an error occurred on the target device. Kind dictates what allocator
-// to use (e.g. shared, host, device).
-void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
-                           int32_t Kind);
-
-// Pass the data content to the target device using the target address. In case
-// of success, return zero. Otherwise, return an error code.
-int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
-                              int64_t Size);
-
-int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr,
-                                    int64_t Size, __tgt_async_info *AsyncInfo);
-
-// Retrieve the data content from the target device using its address. In case
-// of success, return zero. Otherwise, return an error code.
-int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
-                                int64_t Size);
-
-// Asynchronous version of __tgt_rtl_data_retrieve
-int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr,
-                                      void *TargetPtr, int64_t Size,
-                                      __tgt_async_info *AsyncInfo);
-
-// Copy the data content from one target device to another target device using
-// its address. This operation does not need to copy data back to host and then
-// from host to another device. In case of success, return zero. Otherwise,
-// return an error code.
-int32_t __tgt_rtl_data_exchange(int32_t SrcID, void *SrcPtr, int32_t DstID,
-                                void *DstPtr, int64_t Size);
-
-// Asynchronous version of __tgt_rtl_data_exchange
-int32_t __tgt_rtl_data_exchange_async(int32_t SrcID, void *SrcPtr,
-                                      int32_t DesID, void *DstPtr, int64_t Size,
-                                      __tgt_async_info *AsyncInfo);
-
-// De-allocate the data referenced by target ptr on the device. In case of
-// success, return zero. Otherwise, return an error code. Kind dictates what
-// allocator to use (e.g. shared, host, device).
-int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr, int32_t Kind);
-
-// Transfer control to the offloaded entry Entry on the target device.
-// Args and Offsets are arrays of NumArgs size of target addresses and
-// offsets. An offset should be added to the target address before passing it
-// to the outlined function on device side. If AsyncInfo is nullptr, it is
-// synchronous; otherwise it is asynchronous. However, AsyncInfo may be
-// ignored on some platforms, like x86_64. In that case, it is synchronous. In
-// case of success, return zero. Otherwise, return an error code.
-int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
-                                    ptrdiff_t *Offsets, int32_t NumArgs);
-
-// Asynchronous version of __tgt_rtl_run_target_region
-int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
-                                          ptrdiff_t *Offsets, int32_t NumArgs,
-                                          __tgt_async_info *AsyncInfo);
-
-// Similar to __tgt_rtl_run_target_region, but additionally specify the
-// number of teams to be created and a number of threads in each team. If
-// AsyncInfo is nullptr, it is synchronous; otherwise it is asynchronous.
-// However, AsyncInfo may be ignored on some platforms, like x86_64. In that
-// case, it is synchronous.
-int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
-                                         ptrdiff_t *Offsets, int32_t NumArgs,
-                                         int32_t NumTeams, int32_t ThreadLimit,
-                                         uint64_t LoopTripcount);
-
-// Asynchronous version of __tgt_rtl_run_target_team_region
-int32_t __tgt_rtl_run_target_team_region_async(
-    int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs,
-    int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripcount,
-    __tgt_async_info *AsyncInfo);
-
-// Device synchronization. In case of success, return zero. Otherwise, return an
-// error code.
-int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
-
-// Queries for the completion of asynchronous operations. Instead of blocking
-// the calling thread as __tgt_rtl_synchronize, the progress of the operations
-// stored in AsyncInfo->Queue is queried in a non-blocking manner, partially
-// advancing their execution. If all operations are completed, AsyncInfo->Queue
-// is set to nullptr. If there are still pending operations, AsyncInfo->Queue is
-// kept as a valid queue. In any case of success (i.e., successful query
-// with/without completing all operations), return zero. Otherwise, return an
-// error code.
-int32_t __tgt_rtl_query_async(int32_t ID, __tgt_async_info *AsyncInfo);
-
-// Set plugin's internal information flag externally.
-void __tgt_rtl_set_info_flag(uint32_t);
-
-// Print the device information
-void __tgt_rtl_print_device_info(int32_t ID);
-
-// Event related interfaces. It is expected to use the interfaces in the
-// following way:
-// 1) Create an event on the target device (__tgt_rtl_create_event).
-// 2) Record the event based on the status of \p AsyncInfo->Queue at the moment
-// of function call to __tgt_rtl_record_event. An event becomes "meaningful"
-// once it is recorded, such that others can depend on it.
-// 3) Call __tgt_rtl_wait_event to set dependence on the event. Whether the
-// operation is blocking or non-blocking depends on the target. It is expected
-// to be non-blocking, just set dependence and return.
-// 4) Call __tgt_rtl_sync_event to sync the event. It is expected to block the
-// thread calling the function.
-// 5) Destroy the event (__tgt_rtl_destroy_event).
-// {
-int32_t __tgt_rtl_create_event(int32_t ID, void **Event);
-
-int32_t __tgt_rtl_record_event(int32_t ID, void *Event,
-                               __tgt_async_info *AsyncInfo);
-
-int32_t __tgt_rtl_wait_event(int32_t ID, void *Event,
-                             __tgt_async_info *AsyncInfo);
-
-int32_t __tgt_rtl_sync_event(int32_t ID, void *Event);
-
-int32_t __tgt_rtl_destroy_event(int32_t ID, void *Event);
-// }
-
-int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr);
-int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr,
-                                   const char **ErrStr);
-
-// lock/pin host memory
-int32_t __tgt_rtl_data_lock(int32_t ID, void *HstPtr, int64_t Size,
-                            void **LockedPtr);
-
-// unlock/unpin host memory
-int32_t __tgt_rtl_data_unlock(int32_t ID, void *HstPtr);
-
-// Notify the plugin about a new mapping starting at the host address \p HstPtr
-// and \p Size bytes. The plugin may lock/pin that buffer to achieve optimal
-// memory transfers involving that buffer.
-int32_t __tgt_rtl_data_notify_mapped(int32_t ID, void *HstPtr, int64_t Size);
-
-// Notify the plugin about an existing mapping being unmapped, starting at the
-// host address \p HstPtr and \p Size bytes.
-int32_t __tgt_rtl_data_notify_unmapped(int32_t ID, void *HstPtr);
-
-// Set the global device identifier offset, such that the plugin may determine a
-// unique device number.
-int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset);
-
-int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
-                                void **TgtArgs, ptrdiff_t *TgtOffsets,
-                                KernelArgsTy *KernelArgs,
-                                __tgt_async_info *AsyncInfoPtr);
-
-int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
-                                           void *VAddr, bool isRecord,
-                                           bool SaveOutput,
-                                           uint64_t &ReqPtrArgOffset);
-
-// Returns true if the device \p DeviceId suggests to use auto zero-copy.
-int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId);
-}
-
-#endif // OMPTARGET_SHARED_PLUGIN_API_H
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc
deleted file mode 100644
index e445da6852f7b4..00000000000000
--- a/openmp/libomptarget/include/Shared/PluginAPI.inc
+++ /dev/null
@@ -1,51 +0,0 @@
-//===-- Shared/PluginAPI.inc - Target independent plugin API ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the names of the interface functions between target
-// independent offload runtime library and target dependent plugins.
-//
-//===----------------------------------------------------------------------===//
-
-// No include guards!
-
-PLUGIN_API_HANDLE(init_plugin);
-PLUGIN_API_HANDLE(is_valid_binary);
-PLUGIN_API_HANDLE(is_data_exchangable);
-PLUGIN_API_HANDLE(number_of_devices);
-PLUGIN_API_HANDLE(init_device);
-PLUGIN_API_HANDLE(load_binary);
-PLUGIN_API_HANDLE(get_global);
-PLUGIN_API_HANDLE(get_function);
-PLUGIN_API_HANDLE(data_alloc);
-PLUGIN_API_HANDLE(data_submit);
-PLUGIN_API_HANDLE(data_submit_async);
-PLUGIN_API_HANDLE(data_retrieve);
-PLUGIN_API_HANDLE(data_retrieve_async);
-PLUGIN_API_HANDLE(data_exchange);
-PLUGIN_API_HANDLE(data_exchange_async);
-PLUGIN_API_HANDLE(data_delete);
-PLUGIN_API_HANDLE(launch_kernel);
-PLUGIN_API_HANDLE(init_requires);
-PLUGIN_API_HANDLE(synchronize);
-PLUGIN_API_HANDLE(query_async);
-PLUGIN_API_HANDLE(set_info_flag);
-PLUGIN_API_HANDLE(print_device_info);
-PLUGIN_API_HANDLE(create_event);
-PLUGIN_API_HANDLE(record_event);
-PLUGIN_API_HANDLE(wait_event);
-PLUGIN_API_HANDLE(sync_event);
-PLUGIN_API_HANDLE(destroy_event);
-PLUGIN_API_HANDLE(init_async_info);
-PLUGIN_API_HANDLE(init_device_info);
-PLUGIN_API_HANDLE(data_lock);
-PLUGIN_API_HANDLE(data_unlock);
-PLUGIN_API_HANDLE(data_notify_mapped);
-PLUGIN_API_HANDLE(data_notify_unmapped);
-PLUGIN_API_HANDLE(set_device_offset);
-PLUGIN_API_HANDLE(initialize_record_replay);
-PLUGIN_API_HANDLE(use_auto_zero_copy);
diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index bd2829722bb324..fd6e5fba5fc530 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -33,17 +33,19 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 
+#include "PluginInterface.h"
+using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;
+
 // Forward declarations.
-struct PluginAdaptorTy;
 struct __tgt_bin_desc;
 struct __tgt_target_table;
 
 struct DeviceTy {
   int32_t DeviceID;
-  PluginAdaptorTy *RTL;
+  GenericPluginTy *RTL;
   int32_t RTLDeviceID;
 
-  DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID);
+  DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID);
   // DeviceTy is not copyable
   DeviceTy(const DeviceTy &D) = delete;
   DeviceTy &operator=(const DeviceTy &D) = delete;
diff --git a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
index df625e97c7ebf5..d1079f8a3e9cc2 100644
--- a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
@@ -14,7 +14,7 @@
 set(common_dir ${CMAKE_CURRENT_SOURCE_DIR}/common)
 add_subdirectory(common)
 function(add_target_library target_name lib_name)
-  add_llvm_library(${target_name} SHARED
+  add_llvm_library(${target_name} STATIC
     LINK_COMPONENTS
       ${LLVM_TARGETS_TO_BUILD}
       AggressiveInstCombine
@@ -46,27 +46,14 @@ function(add_target_library target_name lib_name)
   )
 
   llvm_update_compile_flags(${target_name})
+  target_include_directories(${target_name} PUBLIC ${common_dir}/include)
   target_link_libraries(${target_name} PRIVATE
                         PluginCommon ${OPENMP_PTHREAD_LIB})
 
   target_compile_definitions(${target_name} PRIVATE TARGET_NAME=${lib_name})
   target_compile_definitions(${target_name} PRIVATE 
                              DEBUG_PREFIX="TARGET ${lib_name} RTL")
-
-  if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
-    # On FreeBSD, the 'environ' symbol is undefined at link time, but resolved by
-    # the dynamic linker at runtime. Therefore, allow the symbol to be undefined
-    # when creating a shared library.
-    target_link_libraries(${target_name} PRIVATE "-Wl,--allow-shlib-undefined")
-  else()
-    target_link_libraries(${target_name} PRIVATE "-Wl,-z,defs")
-  endif()
-
-  if(LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
-    target_link_libraries(${target_name} PRIVATE
-    "-Wl,--version-script=${common_dir}/../exports")
-  endif()
-  set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET protected)
+  set_target_properties(${target_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 endfunction()
 
 foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD)
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/amdgpu/CMakeLists.txt
index 40df77102c78fb..738183f8945ed7 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/CMakeLists.txt
@@ -57,8 +57,3 @@ else()
   libomptarget_say("Not generating AMDGPU tests, no supported devices detected."
                    " Use 'LIBOMPTARGET_FORCE_AMDGPU_TESTS' to override.")
 endif()
-
-# Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.amdgpu PROPERTIES
-  INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..")
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 2dd08dd5d0b4ea..89a908f96e5d39 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3043,10 +3043,6 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     // HSA functions from now on, e.g., hsa_shut_down.
     Initialized = true;
 
-#ifdef OMPT_SUPPORT
-    ompt::connectLibrary();
-#endif
-
     // Register event handler to detect memory errors on the devices.
     Status = hsa_amd_register_system_event_handler(eventHandler, nullptr);
     if (auto Err = Plugin::check(
@@ -3134,6 +3130,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
 
   Triple::ArchType getTripleArch() const override { return Triple::amdgcn; }
 
+  const char *getName() const override { return GETNAME(TARGET_NAME); }
+
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
 
@@ -3366,8 +3364,6 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
   return Plugin::success();
 }
 
-GenericPluginTy *PluginTy::createPlugin() { return new AMDGPUPluginTy(); }
-
 template <typename... ArgsTy>
 static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
   hsa_status_t ResultCode = static_cast<hsa_status_t>(Code);
@@ -3455,3 +3451,9 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
 } // namespace target
 } // namespace omp
 } // namespace llvm
+
+extern "C" {
+llvm::omp::target::plugin::GenericPluginTy *createPlugin_amdgpu() {
+  return new llvm::omp::target::plugin::AMDGPUPluginTy();
+}
+}
diff --git a/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
index acf0af63f0508c..a470dcee6d8598 100644
--- a/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
@@ -66,6 +66,4 @@ target_include_directories(PluginCommon PUBLIC
   ${LIBOMPTARGET_INCLUDE_DIR}
 )
 
-set_target_properties(PluginCommon PROPERTIES
-  POSITION_INDEPENDENT_CODE ON
-  CXX_VISIBILITY_PRESET protected)
+set_target_properties(PluginCommon PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 79e8464bfda5c1..e7a008f3a85730 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -1010,6 +1010,9 @@ struct GenericPluginTy {
   /// Get the target triple of this plugin.
   virtual Triple::ArchType getTripleArch() const = 0;
 
+  /// Get the constant name identifier for this plugin.
+  virtual const char *getName() const = 0;
+
   /// Allocate a structure using the internal allocator.
   template <typename Ty> Ty *allocate() {
     return reinterpret_cast<Ty *>(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
@@ -1226,7 +1229,7 @@ namespace Plugin {
 /// Create a success error. This is the same as calling Error::success(), but
 /// it is recommended to use this one for consistency with Plugin::error() and
 /// Plugin::check().
-static Error success() { return Error::success(); }
+static inline Error success() { return Error::success(); }
 
 /// Create a string error.
 template <typename... ArgsTy>
@@ -1246,95 +1249,6 @@ template <typename... ArgsTy>
 static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args);
 } // namespace Plugin
 
-/// Class for simplifying the getter operation of the plugin. Anywhere on the
-/// code, the current plugin can be retrieved by Plugin::get(). The class also
-/// declares functions to create plugin-specific object instances. The check(),
-/// createPlugin(), createDevice() and createGlobalHandler() functions should be
-/// defined by each plugin implementation.
-class PluginTy {
-  // Reference to the plugin instance.
-  static GenericPluginTy *SpecificPlugin;
-
-  PluginTy() {
-    if (auto Err = init())
-      REPORT("Failed to initialize plugin: %s\n",
-             toString(std::move(Err)).data());
-  }
-
-  ~PluginTy() {
-    if (auto Err = deinit())
-      REPORT("Failed to deinitialize plugin: %s\n",
-             toString(std::move(Err)).data());
-  }
-
-  PluginTy(const PluginTy &) = delete;
-  void operator=(const PluginTy &) = delete;
-
-  /// Create and intialize the plugin instance.
-  static Error init() {
-    assert(!SpecificPlugin && "Plugin already created");
-
-    // Create the specific plugin.
-    SpecificPlugin = createPlugin();
-    assert(SpecificPlugin && "Plugin was not created");
-
-    // Initialize the plugin.
-    return SpecificPlugin->init();
-  }
-
-  // Deinitialize and destroy the plugin instance.
-  static Error deinit() {
-    assert(SpecificPlugin && "Plugin no longer valid");
-
-    for (int32_t DevNo = 0, NumDev = SpecificPlugin->getNumDevices();
-         DevNo < NumDev; ++DevNo)
-      if (auto Err = SpecificPlugin->deinitDevice(DevNo))
-        return Err;
-
-    // Deinitialize the plugin.
-    if (auto Err = SpecificPlugin->deinit())
-      return Err;
-
-    // Delete the plugin instance.
-    delete SpecificPlugin;
-
-    // Invalidate the plugin reference.
-    SpecificPlugin = nullptr;
-
-    return Plugin::success();
-  }
-
-public:
-  /// Initialize the plugin if needed. The plugin could have been initialized by
-  /// a previous call to Plugin::get().
-  static Error initIfNeeded() {
-    // Trigger the initialization if needed.
-    get();
-
-    return Error::success();
-  }
-
-  /// Get a reference (or create if it was not created) to the plugin instance.
-  static GenericPluginTy &get() {
-    // This static variable will initialize the underlying plugin instance in
-    // case there was no previous explicit initialization. The initialization is
-    // thread safe.
-    static PluginTy Plugin;
-
-    assert(SpecificPlugin && "Plugin is not active");
-    return *SpecificPlugin;
-  }
-
-  /// Get a reference to the plugin with a specific plugin-specific type.
-  template <typename Ty> static Ty &get() { return static_cast<Ty &>(get()); }
-
-  /// Indicate whether the plugin is active.
-  static bool isActive() { return SpecificPlugin != nullptr; }
-
-  /// Create a plugin instance.
-  static GenericPluginTy *createPlugin();
-};
-
 /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class
 /// acts as a reference to a device resource, such as a stream, and requires
 /// some basic functions to be implemented. The derived class should define an
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h b/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
index 88c83d39b68ceb..d70fbbd0ba40e1 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
@@ -13,8 +13,6 @@
 #ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_ELF_UTILS_H
 #define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_ELF_UTILS_H
 
-#include "Shared/PluginAPI.h"
-
 #include "llvm/Object/ELF.h"
 #include "llvm/Object/ELFObjectFile.h"
 
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp
index 9eb610cab4de66..9d58e6060646ba 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp
@@ -56,28 +56,6 @@ bool isImageBitcode(const __tgt_device_image &Image) {
   return identify_magic(Binary) == file_magic::bitcode;
 }
 
-std::once_flag InitFlag;
-
-void init(Triple TT) {
-  codegen::RegisterCodeGenFlags();
-#ifdef LIBOMPTARGET_JIT_NVPTX
-  if (TT.isNVPTX()) {
-    LLVMInitializeNVPTXTargetInfo();
-    LLVMInitializeNVPTXTarget();
-    LLVMInitializeNVPTXTargetMC();
-    LLVMInitializeNVPTXAsmPrinter();
-  }
-#endif
-#ifdef LIBOMPTARGET_JIT_AMDGPU
-  if (TT.isAMDGPU()) {
-    LLVMInitializeAMDGPUTargetInfo();
-    LLVMInitializeAMDGPUTarget();
-    LLVMInitializeAMDGPUTargetMC();
-    LLVMInitializeAMDGPUAsmPrinter();
-  }
-#endif
-}
-
 Expected<std::unique_ptr<Module>>
 createModuleFromMemoryBuffer(std::unique_ptr<MemoryBuffer> &MB,
                              LLVMContext &Context) {
@@ -148,7 +126,23 @@ createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) {
 } // namespace
 
 JITEngine::JITEngine(Triple::ArchType TA) : TT(Triple::getArchTypeName(TA)) {
-  std::call_once(InitFlag, init, TT);
+  codegen::RegisterCodeGenFlags();
+#ifdef LIBOMPTARGET_JIT_NVPTX
+  if (TT.isNVPTX()) {
+    LLVMInitializeNVPTXTargetInfo();
+    LLVMInitializeNVPTXTarget();
+    LLVMInitializeNVPTXTargetMC();
+    LLVMInitializeNVPTXAsmPrinter();
+  }
+#endif
+#ifdef LIBOMPTARGET_JIT_AMDGPU
+  if (TT.isAMDGPU()) {
+    LLVMInitializeAMDGPUTargetInfo();
+    LLVMInitializeAMDGPUTarget();
+    LLVMInitializeAMDGPUTargetMC();
+    LLVMInitializeAMDGPUAsmPrinter();
+  }
+#endif
 }
 
 void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index a4e6c93192159a..4037a85aa9ef24 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -13,7 +13,6 @@
 #include "Shared/APITypes.h"
 #include "Shared/Debug.h"
 #include "Shared/Environment.h"
-#include "Shared/PluginAPI.h"
 
 #include "GlobalHandler.h"
 #include "JIT.h"
@@ -39,8 +38,6 @@ using namespace omp;
 using namespace target;
 using namespace plugin;
 
-GenericPluginTy *PluginTy::SpecificPlugin = nullptr;
-
 // TODO: Fix any thread safety issues for multi-threaded kernel recording.
 struct RecordReplayTy {
 
@@ -2021,205 +2018,3 @@ bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
   return false;
 #endif
 }
-
-/// Exposed library API function, basically wrappers around the GenericDeviceTy
-/// functionality with the same name. All non-async functions are redirected
-/// to the async versions right away with a NULL AsyncInfoPtr.
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int32_t __tgt_rtl_init_plugin() {
-  auto Err = PluginTy::initIfNeeded();
-  if (Err) {
-    [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
-    DP("Failed to init plugin: %s", ErrStr.c_str());
-    return OFFLOAD_FAIL;
-  }
-
-  return OFFLOAD_SUCCESS;
-}
-
-int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
-  if (!PluginTy::isActive())
-    return false;
-
-  return PluginTy::get().is_valid_binary(Image);
-}
-
-int32_t __tgt_rtl_init_device(int32_t DeviceId) {
-  return PluginTy::get().init_device(DeviceId);
-}
-
-int32_t __tgt_rtl_number_of_devices() {
-  return PluginTy::get().number_of_devices();
-}
-
-int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
-  return PluginTy::get().init_requires(RequiresFlags);
-}
-
-int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId,
-                                      int32_t DstDeviceId) {
-  return PluginTy::get().is_data_exchangable(SrcDeviceId, DstDeviceId);
-}
-
-int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
-                                           void *VAddr, bool isRecord,
-                                           bool SaveOutput,
-                                           uint64_t &ReqPtrArgOffset) {
-  return PluginTy::get().initialize_record_replay(
-      DeviceId, MemorySize, VAddr, isRecord, SaveOutput, ReqPtrArgOffset);
-}
-
-int32_t __tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage,
-                              __tgt_device_binary *Binary) {
-  return PluginTy::get().load_binary(DeviceId, TgtImage, Binary);
-}
-
-void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
-                           int32_t Kind) {
-  return PluginTy::get().data_alloc(DeviceId, Size, HostPtr, Kind);
-}
-
-int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) {
-  return PluginTy::get().data_delete(DeviceId, TgtPtr, Kind);
-}
-
-int32_t __tgt_rtl_data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
-                            void **LockedPtr) {
-  return PluginTy::get().data_lock(DeviceId, Ptr, Size, LockedPtr);
-}
-
-int32_t __tgt_rtl_data_unlock(int32_t DeviceId, void *Ptr) {
-  return PluginTy::get().data_unlock(DeviceId, Ptr);
-}
-
-int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr,
-                                     int64_t Size) {
-  return PluginTy::get().data_notify_mapped(DeviceId, HstPtr, Size);
-}
-
-int32_t __tgt_rtl_data_notify_unmapped(int32_t DeviceId, void *HstPtr) {
-  return PluginTy::get().data_notify_unmapped(DeviceId, HstPtr);
-}
-
-int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
-                              int64_t Size) {
-  return PluginTy::get().data_submit(DeviceId, TgtPtr, HstPtr, Size);
-}
-
-int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
-                                    void *HstPtr, int64_t Size,
-                                    __tgt_async_info *AsyncInfoPtr) {
-  return PluginTy::get().data_submit_async(DeviceId, TgtPtr, HstPtr, Size,
-                                           AsyncInfoPtr);
-}
-
-int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
-                                int64_t Size) {
-  return PluginTy::get().data_retrieve(DeviceId, HstPtr, TgtPtr, Size);
-}
-
-int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
-                                      void *TgtPtr, int64_t Size,
-                                      __tgt_async_info *AsyncInfoPtr) {
-  return PluginTy::get().data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size,
-                                             AsyncInfoPtr);
-}
-
-int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
-                                int32_t DstDeviceId, void *DstPtr,
-                                int64_t Size) {
-  return PluginTy::get().data_exchange(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
-                                       Size);
-}
-
-int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
-                                      int DstDeviceId, void *DstPtr,
-                                      int64_t Size,
-                                      __tgt_async_info *AsyncInfo) {
-  return PluginTy::get().data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId,
-                                             DstPtr, Size, AsyncInfo);
-}
-
-int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
-                                void **TgtArgs, ptrdiff_t *TgtOffsets,
-                                KernelArgsTy *KernelArgs,
-                                __tgt_async_info *AsyncInfoPtr) {
-  return PluginTy::get().launch_kernel(DeviceId, TgtEntryPtr, TgtArgs,
-                                       TgtOffsets, KernelArgs, AsyncInfoPtr);
-}
-
-int32_t __tgt_rtl_synchronize(int32_t DeviceId,
-                              __tgt_async_info *AsyncInfoPtr) {
-  return PluginTy::get().synchronize(DeviceId, AsyncInfoPtr);
-}
-
-int32_t __tgt_rtl_query_async(int32_t DeviceId,
-                              __tgt_async_info *AsyncInfoPtr) {
-  return PluginTy::get().query_async(DeviceId, AsyncInfoPtr);
-}
-
-void __tgt_rtl_print_device_info(int32_t DeviceId) {
-  PluginTy::get().print_device_info(DeviceId);
-}
-
-int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) {
-  return PluginTy::get().create_event(DeviceId, EventPtr);
-}
-
-int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr,
-                               __tgt_async_info *AsyncInfoPtr) {
-  return PluginTy::get().record_event(DeviceId, EventPtr, AsyncInfoPtr);
-}
-
-int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr,
-                             __tgt_async_info *AsyncInfoPtr) {
-  return PluginTy::get().wait_event(DeviceId, EventPtr, AsyncInfoPtr);
-}
-
-int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) {
-  return PluginTy::get().sync_event(DeviceId, EventPtr);
-}
-
-int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) {
-  return PluginTy::get().destroy_event(DeviceId, EventPtr);
-}
-
-void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
-  return PluginTy::get().set_info_flag(NewInfoLevel);
-}
-
-int32_t __tgt_rtl_init_async_info(int32_t DeviceId,
-                                  __tgt_async_info **AsyncInfoPtr) {
-  return PluginTy::get().init_async_info(DeviceId, AsyncInfoPtr);
-}
-
-int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
-                                   __tgt_device_info *DeviceInfo,
-                                   const char **ErrStr) {
-  return PluginTy::get().init_device_info(DeviceId, DeviceInfo, ErrStr);
-}
-
-int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset) {
-  return PluginTy::get().set_device_offset(DeviceIdOffset);
-}
-
-int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId) {
-  return PluginTy::get().use_auto_zero_copy(DeviceId);
-}
-
-int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size,
-                             const char *Name, void **DevicePtr) {
-  return PluginTy::get().get_global(Binary, Size, Name, DevicePtr);
-}
-
-int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name,
-                               void **KernelPtr) {
-  return PluginTy::get().get_function(Binary, Name, KernelPtr);
-}
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/cuda/CMakeLists.txt
index b3530462aa19ba..dd684bb223431d 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/cuda/CMakeLists.txt
@@ -51,8 +51,3 @@ else()
   libomptarget_say("Not generating NVIDIA tests, no supported devices detected."
                    " Use 'LIBOMPTARGET_FORCE_NVIDIA_TESTS' to override.")
 endif()
-
-# Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.cuda PROPERTIES
-  INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..")
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index fc74c6aa23fddd..b260334baa18b0 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -1342,10 +1342,6 @@ struct CUDAPluginTy final : public GenericPluginTy {
       return 0;
     }
 
-#ifdef OMPT_SUPPORT
-    ompt::connectLibrary();
-#endif
-
     if (Res == CUDA_ERROR_NO_DEVICE) {
       // Do not initialize if there are no devices.
       DP("There are no devices supporting CUDA.\n");
@@ -1390,6 +1386,8 @@ struct CUDAPluginTy final : public GenericPluginTy {
     return Triple::nvptx64;
   }
 
+  const char *getName() const override { return GETNAME(TARGET_NAME); }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected<bool> isELFCompatible(StringRef Image) const override {
     auto ElfOrErr =
@@ -1495,8 +1493,6 @@ Error CUDADeviceTy::dataExchangeImpl(const void *SrcPtr,
   return Plugin::check(Res, "Error in cuMemcpyDtoDAsync: %s");
 }
 
-GenericPluginTy *PluginTy::createPlugin() { return new CUDAPluginTy(); }
-
 template <typename... ArgsTy>
 static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
   CUresult ResultCode = static_cast<CUresult>(Code);
@@ -1516,3 +1512,9 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
 } // namespace target
 } // namespace omp
 } // namespace llvm
+
+extern "C" {
+llvm::omp::target::plugin::GenericPluginTy *createPlugin_cuda() {
+  return new llvm::omp::target::plugin::CUDAPluginTy();
+}
+}
diff --git a/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt
index 0954f8367654f6..fb855f4646dea8 100644
--- a/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/host/CMakeLists.txt
@@ -31,14 +31,6 @@ else()
   target_include_directories(omptarget.rtl.host PRIVATE dynamic_ffi)
 endif()
 
-# Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.host
-        LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.host PROPERTIES
-  INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
-  POSITION_INDEPENDENT_CODE ON
-  CXX_VISIBILITY_PRESET protected)
-
 target_include_directories(omptarget.rtl.host PRIVATE
                            ${LIBOMPTARGET_INCLUDE_DIR})
 
diff --git a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
index f0ce24249301af..8970fd54bf4886 100644
--- a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
@@ -383,10 +383,6 @@ struct GenELF64PluginTy final : public GenericPluginTy {
 
   /// Initialize the plugin and return the number of devices.
   Expected<int32_t> initImpl() override {
-#ifdef OMPT_SUPPORT
-    ompt::connectLibrary();
-#endif
-
 #ifdef USES_DYNAMIC_FFI
     if (auto Err = Plugin::check(ffi_init(), "Failed to initialize libffi"))
       return std::move(Err);
@@ -423,9 +419,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
   Triple::ArchType getTripleArch() const override {
     return llvm::Triple(LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE).getArch();
   }
-};
 
-GenericPluginTy *PluginTy::createPlugin() { return new GenELF64PluginTy(); }
+  const char *getName() const override { return GETNAME(TARGET_NAME); }
+};
 
 template <typename... ArgsTy>
 static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) {
@@ -440,3 +436,9 @@ static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) {
 } // namespace target
 } // namespace omp
 } // namespace llvm
+
+extern "C" {
+llvm::omp::target::plugin::GenericPluginTy *createPlugin_host() {
+  return new llvm::omp::target::plugin::GenELF64PluginTy();
+}
+}
diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt
index b833c5199fab04..62ca4a11a9e6d4 100644
--- a/openmp/libomptarget/src/CMakeLists.txt
+++ b/openmp/libomptarget/src/CMakeLists.txt
@@ -59,6 +59,10 @@ target_compile_definitions(omptarget PRIVATE
   DEBUG_PREFIX="omptarget"
 )
 
+foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD)
+  target_link_libraries(omptarget PRIVATE omptarget.rtl.${plugin})
+endforeach()
+
 target_compile_options(omptarget PUBLIC ${offload_compile_flags})
 target_link_options(omptarget PUBLIC ${offload_link_flags})
 
diff --git a/openmp/libomptarget/src/OffloadRTL.cpp b/openmp/libomptarget/src/OffloadRTL.cpp
index dd75b1b181505e..29b573a27d087d 100644
--- a/openmp/libomptarget/src/OffloadRTL.cpp
+++ b/openmp/libomptarget/src/OffloadRTL.cpp
@@ -50,6 +50,7 @@ void deinitRuntime() {
 
   if (RefCount == 1) {
     DP("Deinit offload library!\n");
+    PM->deinit();
     delete PM;
     PM = nullptr;
   }
diff --git a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
index 1a995cde7816e1..bdbc440c64a2c9 100644
--- a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
+++ b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
@@ -230,14 +230,14 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
   }
 
   DeviceTy &Device = *DeviceOrErr;
-  if (!Device.RTL || !Device.RTL->init_device_info ||
+  if (!Device.RTL ||
       Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
                                    &(InteropPtr)->err_str)) {
     delete InteropPtr;
     InteropPtr = omp_interop_none;
   }
   if (InteropType == kmp_interop_type_tasksync) {
-    if (!Device.RTL || !Device.RTL->init_async_info ||
+    if (!Device.RTL ||
         Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
       delete InteropPtr;
       InteropPtr = omp_interop_none;
diff --git a/openmp/libomptarget/src/PluginManager.cpp b/openmp/libomptarget/src/PluginManager.cpp
index dbb556c179e58c..191afa345641a1 100644
--- a/openmp/libomptarget/src/PluginManager.cpp
+++ b/openmp/libomptarget/src/PluginManager.cpp
@@ -23,85 +23,25 @@ using namespace llvm::sys;
 
 PluginManager *PM = nullptr;
 
-Expected<std::unique_ptr<PluginAdaptorTy>>
-PluginAdaptorTy::create(const std::string &Name) {
-  DP("Attempting to load library '%s'...\n", Name.c_str());
-  TIMESCOPE_WITH_NAME_AND_IDENT(Name, (const ident_t *)nullptr);
-
-  std::string ErrMsg;
-  auto LibraryHandler = std::make_unique<DynamicLibrary>(
-      DynamicLibrary::getPermanentLibrary(Name.c_str(), &ErrMsg));
-
-  if (!LibraryHandler->isValid()) {
-    // Library does not exist or cannot be found.
-    return createStringError(inconvertibleErrorCode(),
-                             "Unable to load library '%s': %s!\n", Name.c_str(),
-                             ErrMsg.c_str());
-  }
-
-  DP("Successfully loaded library '%s'!\n", Name.c_str());
-  auto PluginAdaptor = std::unique_ptr<PluginAdaptorTy>(
-      new PluginAdaptorTy(Name, std::move(LibraryHandler)));
-  if (auto Err = PluginAdaptor->init())
-    return Err;
-  return std::move(PluginAdaptor);
-}
-
-PluginAdaptorTy::PluginAdaptorTy(const std::string &Name,
-                                 std::unique_ptr<llvm::sys::DynamicLibrary> DL)
-    : Name(Name), LibraryHandler(std::move(DL)) {}
-
-Error PluginAdaptorTy::init() {
-
-#define PLUGIN_API_HANDLE(NAME)                                                \
-  NAME = reinterpret_cast<decltype(NAME)>(                                     \
-      LibraryHandler->getAddressOfSymbol(GETNAME(__tgt_rtl_##NAME)));          \
-  if (!NAME) {                                                                 \
-    return createStringError(inconvertibleErrorCode(),                         \
-                             "Invalid plugin as necessary interface function " \
-                             "(%s) was not found.\n",                          \
-                             std::string(#NAME).c_str());                      \
-  }
-
-#include "Shared/PluginAPI.inc"
-#undef PLUGIN_API_HANDLE
-
-  // Remove plugin on failure to call optional init_plugin
-  int32_t Rc = init_plugin();
-  if (Rc != OFFLOAD_SUCCESS) {
-    return createStringError(inconvertibleErrorCode(),
-                             "Unable to initialize library '%s': %u!\n",
-                             Name.c_str(), Rc);
-  }
-
-  // No devices are supported by this RTL?
-  int32_t NumberOfPluginDevices = number_of_devices();
-  if (!NumberOfPluginDevices) {
-    return createStringError(inconvertibleErrorCode(),
-                             "No devices supported in this RTL\n");
-  }
-
-  DP("Registered '%s' with %d plugin visible devices!\n", Name.c_str(),
-     NumberOfPluginDevices);
-  return Error::success();
-}
+// Every plugin exports this method to create an instance of the plugin type.
+#define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name();
+#include "Shared/Targets.def"
 
 void PluginManager::init() {
   TIMESCOPE();
   DP("Loading RTLs...\n");
 
-  // Attempt to open all the plugins and, if they exist, check if the interface
-  // is correct and if they are supporting any devices.
+  // Attempt to create an instance of each supported plugin.
 #define PLUGIN_TARGET(Name)                                                    \
   do {                                                                         \
-    auto PluginAdaptorOrErr =                                                  \
-        PluginAdaptorTy::create("libomptarget.rtl." #Name ".so");              \
-    if (!PluginAdaptorOrErr) {                                                 \
-      [[maybe_unused]] std::string InfoMsg =                                   \
-          toString(PluginAdaptorOrErr.takeError());                            \
-      DP("%s", InfoMsg.c_str());                                               \
+    auto Plugin = std::unique_ptr<GenericPluginTy>(createPlugin_##Name());     \
+    if (auto Err = Plugin->init()) {                                           \
+      [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));         \
+      DP("Failed to init plugin: %s\n", InfoMsg.c_str());                      \
     } else {                                                                   \
-      PluginAdaptors.push_back(std::move(*PluginAdaptorOrErr));                \
+      DP("Registered plugin %s with %d visible device(s)\n",                   \
+         Plugin->getName(), Plugin->number_of_devices());                      \
+      Plugins.emplace_back(std::move(Plugin));                                 \
     }                                                                          \
   } while (false);
 #include "Shared/Targets.def"
@@ -109,15 +49,29 @@ void PluginManager::init() {
   DP("RTLs loaded!\n");
 }
 
-void PluginManager::initDevices(PluginAdaptorTy &RTL) {
+void PluginManager::deinit() {
+  TIMESCOPE();
+  DP("Unloading RTLs...\n");
+
+  for (auto &Plugin : Plugins) {
+    if (auto Err = Plugin->deinit()) {
+      [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+      DP("Failed to deinit plugin: %s\n", InfoMsg.c_str());
+    }
+    Plugin.release();
+  }
+
+  DP("RTLs unloaded!\n");
+}
+
+void PluginManager::initDevices(GenericPluginTy &RTL) {
   // If this RTL has already been initialized.
   if (PM->DeviceOffsets.contains(&RTL))
     return;
   TIMESCOPE();
 
   // If this RTL is not already in use, initialize it.
-  assert(RTL.number_of_devices() > 0 &&
-         "Tried to initialize useless plugin adaptor");
+  assert(RTL.number_of_devices() > 0 && "Tried to initialize useless plugin!");
 
   // Initialize the device information for the RTL we are about to use.
   auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
@@ -157,13 +111,12 @@ void PluginManager::initDevices(PluginAdaptorTy &RTL) {
 
   DeviceOffsets[&RTL] = DeviceOffset;
   DeviceUsed[&RTL] = NumberOfUserDevices;
-  DP("Plugin adaptor " DPxMOD " has index %d, exposes %d out of %d devices!\n",
-     DPxPTR(RTL.LibraryHandler.get()), DeviceOffset, NumberOfUserDevices,
-     RTL.number_of_devices());
+  DP("Plugin has index %d, exposes %d out of %d devices!\n", DeviceOffset,
+     NumberOfUserDevices, RTL.number_of_devices());
 }
 
 void PluginManager::initAllPlugins() {
-  for (auto &R : PluginAdaptors)
+  for (auto &R : Plugins)
     initDevices(*R);
 }
 
@@ -216,19 +169,22 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
     // Obtain the image and information that was previously extracted.
     __tgt_device_image *Img = &DI.getExecutableImage();
 
-    PluginAdaptorTy *FoundRTL = nullptr;
+    GenericPluginTy *FoundRTL = nullptr;
 
     // Scan the RTLs that have associated images until we find one that supports
     // the current image.
-    for (auto &R : PM->pluginAdaptors()) {
+    for (auto &R : PM->plugins()) {
+      if (!R.number_of_devices())
+        continue;
+
       if (!R.is_valid_binary(Img)) {
         DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
-           DPxPTR(Img->ImageStart), R.Name.c_str());
+           DPxPTR(Img->ImageStart), R.getName());
         continue;
       }
 
       DP("Image " DPxMOD " is compatible with RTL %s!\n",
-         DPxPTR(Img->ImageStart), R.Name.c_str());
+         DPxPTR(Img->ImageStart), R.getName());
 
       PM->initDevices(R);
 
@@ -247,7 +203,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
           (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
 
       DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(Img->ImageStart),
-         R.Name.c_str());
+         R.getName());
 
       registerImageIntoTranslationTable(TransTable, PM->DeviceOffsets[&R],
                                         PM->DeviceUsed[&R], Img);
@@ -282,11 +238,11 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
     // Obtain the image and information that was previously extracted.
     __tgt_device_image *Img = &DI.getExecutableImage();
 
-    PluginAdaptorTy *FoundRTL = NULL;
+    GenericPluginTy *FoundRTL = NULL;
 
     // Scan the RTLs that have associated images until we find one that supports
     // the current image. We only need to scan RTLs that are already being used.
-    for (auto &R : PM->pluginAdaptors()) {
+    for (auto &R : PM->plugins()) {
       if (!DeviceOffsets.contains(&R))
         continue;
 
@@ -296,8 +252,7 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
 
       FoundRTL = &R;
 
-      DP("Unregistered image " DPxMOD " from RTL " DPxMOD "!\n",
-         DPxPTR(Img->ImageStart), DPxPTR(R.LibraryHandler.get()));
+      DP("Unregistered image " DPxMOD " from RTL\n", DPxPTR(Img->ImageStart));
 
       break;
     }
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 44a2facc8d3ddd..749b4c567f8e46 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -64,7 +64,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
   return OFFLOAD_SUCCESS;
 }
 
-DeviceTy::DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
+DeviceTy::DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
     : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID),
       MappingInfo(*this) {}
 
@@ -192,7 +192,6 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
           RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
   if (!AsyncInfo) {
-    assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
     return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
                               Size);
   }
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index b562ba8818c39b..d9df69d3559ff1 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -456,7 +456,7 @@ EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
   assert(PM && "Runtime not initialized");
   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
   InfoLevel.store(NewInfoLevel);
-  for (auto &R : PM->pluginAdaptors())
+  for (auto &R : PM->plugins())
     R.set_info_flag(NewInfoLevel);
 }
 
diff --git a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index 761e04e4c7bbdb..1e9a6a84d80583 100644
--- a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -13,8 +13,6 @@
 
 #include "omptarget.h"
 
-#include "Shared/PluginAPI.h"
-
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
diff --git a/openmp/libomptarget/unittests/Plugins/NextgenPluginsTest.cpp b/openmp/libomptarget/unittests/Plugins/NextgenPluginsTest.cpp
index 635bd1637c9032..479b3f614aed2b 100644
--- a/openmp/libomptarget/unittests/Plugins/NextgenPluginsTest.cpp
+++ b/openmp/libomptarget/unittests/Plugins/NextgenPluginsTest.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Shared/PluginAPI.h"
 #include "omptarget.h"
 #include "gtest/gtest.h"
 



More information about the Openmp-commits mailing list