[llvm] [Offload] Add MPI Proxy Plugin (PR #114574)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 1 10:01:20 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Jhonatan Cléto (cl3to)
<details>
<summary>Changes</summary>
This patch introduces a new Offload plugin built on the existing interface to enable the offloading of computational tasks to remote accelerator devices via an MPI Proxy Layer. It improves the efficiency of kernel launches and data transfers by utilizing an event-driven architecture with non-blocking MPI communications and C++20 coroutines, facilitating asynchronous operations.
With this new MPI Plugin, users can offload OpenMP target regions to remote devices seamlessly, as if they were local. Any remote device compatible with an Offload Plugin can be used with the MPI Plugin. Currently, we have tested this plugin with X86_64 and CUDA devices, but it is expected to work with AMD GPUs as well.
Currently, the plugin lacks support for the following features:
- Unified/shared memory allocation/free operations
- Device operations that depend on host function calls outside target regions, such as:
- RPC calls for user-defined functions
- OMPT callbacks
Programs using the MPI Plugin are compiled like standard OpenMP target programs with clang, as shown in this example:
```sh
clang -fopenmp -fopenmp-targets=nvptx64 -o app app.c
```
The MPI Plugin uses a binary, `llvm-offload-mpi-proxy-device`, to execute target operations on the remote device. Thus, to offload tasks to an MPI device, the program must be executed with the Single Program Multiple Data (SPMD) model of an MPI launcher, as shown here:
```sh
mpirun -np N llvm-offload-mpi-proxy-device : -np 1 ./app
```
**Note**: Only one instance of the OpenMP program (`-np 1 ./app`) should be created. If multiple instances are launched, the plugin will not function correctly. Additionally, due to a design constraint, the host process (`app`) must have the rank `WorldSize - 1` for MPI communication to work correctly. Consequently, it's essential to execute the `mpirun` command in the order shown in the previous example.
At runtime, the number of devices returned by the `omp_get_num_devices()` call will be the sum of local devices and all devices available in each `llvm-offload-mpi-proxy-device` instance.
To compile the plugin and run the test suite, an environment with an installed MPI implementation (such as OpenMPI or MPICH) is required.
We currently lack resources to add a dedicated Buildbot for this plugin, so we request that existing Buildbots be updated to support it.
---
Patch is 165.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114574.diff
33 Files Affected:
- (modified) offload/CMakeLists.txt (+5-1)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+63-52)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+1-1)
- (added) offload/plugins-nextgen/mpi/CMakeLists.txt (+134)
- (added) offload/plugins-nextgen/mpi/event_system/CMakeLists.txt (+29)
- (added) offload/plugins-nextgen/mpi/event_system/EventSystem.cpp (+848)
- (added) offload/plugins-nextgen/mpi/event_system/EventSystem.h (+556)
- (added) offload/plugins-nextgen/mpi/src/ProxyDevice.cpp (+1071)
- (added) offload/plugins-nextgen/mpi/src/RemotePluginManager.cpp (+104)
- (added) offload/plugins-nextgen/mpi/src/RemotePluginManager.h (+123)
- (added) offload/plugins-nextgen/mpi/src/RemoteTargets.def.in (+20)
- (added) offload/plugins-nextgen/mpi/src/rtl.cpp (+1309)
- (modified) offload/src/PluginManager.cpp (+7)
- (modified) offload/test/api/omp_device_managed_memory.c (+2)
- (modified) offload/test/api/omp_device_managed_memory_alloc.c (+2)
- (modified) offload/test/libc/host_call.c (+2)
- (modified) offload/test/lit.cfg (+10-2)
- (modified) offload/test/mapping/target_derefence_array_pointrs.cpp (+1)
- (modified) offload/test/mapping/target_has_device_addr.c (+1)
- (modified) offload/test/mapping/target_uses_allocator.c (+1)
- (modified) offload/test/offloading/bug49334.cpp (+1-1)
- (modified) offload/test/offloading/bug64959.c (+1)
- (modified) offload/test/offloading/struct_mapping_with_pointers.cpp (+1)
- (modified) offload/test/offloading/target_critical_region.cpp (+1)
- (modified) offload/test/offloading/thread_limit.c (+1)
- (modified) offload/test/sanitizer/kernel_crash.c (+1)
- (modified) offload/test/sanitizer/kernel_crash_async.c (+1)
- (modified) offload/test/sanitizer/kernel_crash_many.c (+1)
- (modified) offload/test/sanitizer/kernel_crash_single.c (+1)
- (modified) offload/test/sanitizer/kernel_trap.c (+1)
- (modified) offload/test/sanitizer/kernel_trap.cpp (+5-9)
- (modified) offload/test/sanitizer/kernel_trap_async.c (+1)
- (modified) offload/test/sanitizer/kernel_trap_many.c (+1)
``````````diff
diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index 9b771d1116ee38..e01070cca652df 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -139,7 +139,7 @@ if(DEFINED LIBOMPTARGET_BUILD_CUDA_PLUGIN OR
message(WARNING "Option removed, use 'LIBOMPTARGET_PLUGINS_TO_BUILD' instead")
endif()
-set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host)
+set(LIBOMPTARGET_ALL_PLUGIN_TARGETS mpi amdgpu cuda host)
set(LIBOMPTARGET_PLUGINS_TO_BUILD "all" CACHE STRING
"Semicolon-separated list of plugins to use: cuda, amdgpu, host or \"all\".")
@@ -194,8 +194,10 @@ set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-g
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-gnu-LTO")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-unknown-linux-gnu")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-unknown-linux-gnu-LTO")
+set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-unknown-linux-gnu-mpi")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO")
+set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-mpi")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-JIT-LTO")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} s390x-ibm-linux-gnu")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} s390x-ibm-linux-gnu-LTO")
@@ -341,6 +343,8 @@ set(LIBOMPTARGET_LLVM_LIBRARY_DIR "${LLVM_LIBRARY_DIR}" CACHE STRING
set(LIBOMPTARGET_LLVM_LIBRARY_INTDIR "${LIBOMPTARGET_INTDIR}" CACHE STRING
"Path to folder where intermediate libraries will be output")
+set(LIBOMPTARGET_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
# Build offloading plugins and device RTLs if they are available.
add_subdirectory(plugins-nextgen)
add_subdirectory(DeviceRTL)
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 41cc0f286a581f..75fec516de9b88 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1208,130 +1208,141 @@ struct GenericPluginTy {
/// Returns non-zero if the \p Image is compatible with the plugin. This
/// function does not require the plugin to be initialized before use.
- int32_t is_plugin_compatible(__tgt_device_image *Image);
+ virtual int32_t is_plugin_compatible(__tgt_device_image *Image);
/// Returns non-zero if the \p Image is compatible with the device.
- int32_t is_device_compatible(int32_t DeviceId, __tgt_device_image *Image);
+ virtual int32_t is_device_compatible(int32_t DeviceId,
+ __tgt_device_image *Image);
/// Returns non-zero if the plugin device has been initialized.
- int32_t is_device_initialized(int32_t DeviceId) const;
+ virtual int32_t is_device_initialized(int32_t DeviceId) const;
/// Initialize the device inside of the plugin.
- int32_t init_device(int32_t DeviceId);
+ virtual int32_t init_device(int32_t DeviceId);
/// Return the number of devices this plugin can support.
- int32_t number_of_devices();
+ virtual int32_t number_of_devices();
/// Returns non-zero if the data can be exchanged between the two devices.
- int32_t is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId);
+ virtual int32_t is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId);
/// Initializes the record and replay mechanism inside the plugin.
- int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
- void *VAddr, bool isRecord, bool SaveOutput,
- uint64_t &ReqPtrArgOffset);
+ virtual int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
+ void *VAddr, bool isRecord,
+ bool SaveOutput,
+ uint64_t &ReqPtrArgOffset);
/// Loads the associated binary into the plugin and returns a handle to it.
- int32_t load_binary(int32_t DeviceId, __tgt_device_image *TgtImage,
- __tgt_device_binary *Binary);
+ virtual int32_t load_binary(int32_t DeviceId, __tgt_device_image *TgtImage,
+ __tgt_device_binary *Binary);
/// Allocates memory that is accessively to the given device.
- void *data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, int32_t Kind);
+ virtual void *data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
+ int32_t Kind);
/// Deallocates memory on the given device.
- int32_t data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind);
+ virtual int32_t data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind);
/// Locks / pins host memory using the plugin runtime.
- int32_t data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
- void **LockedPtr);
+ virtual int32_t data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
+ void **LockedPtr);
/// Unlocks / unpins host memory using the plugin runtime.
- int32_t data_unlock(int32_t DeviceId, void *Ptr);
+ virtual int32_t data_unlock(int32_t DeviceId, void *Ptr);
/// Notify the runtime about a new mapping that has been created outside.
- int32_t data_notify_mapped(int32_t DeviceId, void *HstPtr, int64_t Size);
+ virtual int32_t data_notify_mapped(int32_t DeviceId, void *HstPtr,
+ int64_t Size);
/// Notify t he runtime about a mapping that has been deleted.
- int32_t data_notify_unmapped(int32_t DeviceId, void *HstPtr);
+ virtual int32_t data_notify_unmapped(int32_t DeviceId, void *HstPtr);
/// Copy data to the given device.
- int32_t data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
- int64_t Size);
+ virtual int32_t data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
+ int64_t Size);
/// Copy data to the given device asynchronously.
- int32_t data_submit_async(int32_t DeviceId, void *TgtPtr, void *HstPtr,
- int64_t Size, __tgt_async_info *AsyncInfoPtr);
+ virtual int32_t data_submit_async(int32_t DeviceId, void *TgtPtr,
+ void *HstPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr);
/// Copy data from the given device.
- int32_t data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
- int64_t Size);
+ virtual int32_t data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
+ int64_t Size);
/// Copy data from the given device asynchornously.
- int32_t data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr,
- int64_t Size, __tgt_async_info *AsyncInfoPtr);
+ virtual int32_t data_retrieve_async(int32_t DeviceId, void *HstPtr,
+ void *TgtPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr);
/// Exchange memory addresses between two devices.
- int32_t data_exchange(int32_t SrcDeviceId, void *SrcPtr, int32_t DstDeviceId,
- void *DstPtr, int64_t Size);
+ virtual int32_t data_exchange(int32_t SrcDeviceId, void *SrcPtr,
+ int32_t DstDeviceId, void *DstPtr,
+ int64_t Size);
/// Exchange memory addresses between two devices asynchronously.
- int32_t data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
- int DstDeviceId, void *DstPtr, int64_t Size,
- __tgt_async_info *AsyncInfo);
+ virtual int32_t data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
+ int DstDeviceId, void *DstPtr,
+ int64_t Size,
+ __tgt_async_info *AsyncInfo);
/// Begin executing a kernel on the given device.
- int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
- ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs,
- __tgt_async_info *AsyncInfoPtr);
+ virtual int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ KernelArgsTy *KernelArgs,
+ __tgt_async_info *AsyncInfoPtr);
/// Synchronize an asyncrhonous queue with the plugin runtime.
- int32_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
+ virtual int32_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
/// Query the current state of an asynchronous queue.
- int32_t query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
+ virtual int32_t query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
/// Prints information about the given devices supported by the plugin.
- void print_device_info(int32_t DeviceId);
+ virtual void print_device_info(int32_t DeviceId);
/// Creates an event in the given plugin if supported.
- int32_t create_event(int32_t DeviceId, void **EventPtr);
+ virtual int32_t create_event(int32_t DeviceId, void **EventPtr);
/// Records an event that has occurred.
- int32_t record_event(int32_t DeviceId, void *EventPtr,
- __tgt_async_info *AsyncInfoPtr);
+ virtual int32_t record_event(int32_t DeviceId, void *EventPtr,
+ __tgt_async_info *AsyncInfoPtr);
/// Wait until an event has occurred.
- int32_t wait_event(int32_t DeviceId, void *EventPtr,
- __tgt_async_info *AsyncInfoPtr);
+ virtual int32_t wait_event(int32_t DeviceId, void *EventPtr,
+ __tgt_async_info *AsyncInfoPtr);
/// Syncrhonize execution until an event is done.
- int32_t sync_event(int32_t DeviceId, void *EventPtr);
+ virtual int32_t sync_event(int32_t DeviceId, void *EventPtr);
/// Remove the event from the plugin.
- int32_t destroy_event(int32_t DeviceId, void *EventPtr);
+ virtual int32_t destroy_event(int32_t DeviceId, void *EventPtr);
/// Remove the event from the plugin.
void set_info_flag(uint32_t NewInfoLevel);
/// Creates an asynchronous queue for the given plugin.
- int32_t init_async_info(int32_t DeviceId, __tgt_async_info **AsyncInfoPtr);
+ virtual int32_t init_async_info(int32_t DeviceId,
+ __tgt_async_info **AsyncInfoPtr);
/// Creates device information to be used for diagnostics.
- int32_t init_device_info(int32_t DeviceId, __tgt_device_info *DeviceInfo,
- const char **ErrStr);
+ virtual int32_t init_device_info(int32_t DeviceId,
+ __tgt_device_info *DeviceInfo,
+ const char **ErrStr);
/// Sets the offset into the devices for use by OMPT.
int32_t set_device_identifier(int32_t UserId, int32_t DeviceId);
/// Returns if the plugin can support auotmatic copy.
- int32_t use_auto_zero_copy(int32_t DeviceId);
+ virtual int32_t use_auto_zero_copy(int32_t DeviceId);
/// Look up a global symbol in the given binary.
- int32_t get_global(__tgt_device_binary Binary, uint64_t Size,
- const char *Name, void **DevicePtr);
+ virtual int32_t get_global(__tgt_device_binary Binary, uint64_t Size,
+ const char *Name, void **DevicePtr);
/// Look up a kernel function in the given binary.
- int32_t get_function(__tgt_device_binary Binary, const char *Name,
- void **KernelPtr);
+ virtual int32_t get_function(__tgt_device_binary Binary, const char *Name,
+ void **KernelPtr);
private:
/// Indicates if the platform runtime has been fully initialized.
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index fe296b77c7d557..c72a0770af23cf 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -43,7 +43,7 @@
#endif
// The number of devices in this plugin.
-#define NUM_DEVICES 4
+#define NUM_DEVICES 1
namespace llvm {
namespace omp {
diff --git a/offload/plugins-nextgen/mpi/CMakeLists.txt b/offload/plugins-nextgen/mpi/CMakeLists.txt
new file mode 100644
index 00000000000000..b64b2218048aa8
--- /dev/null
+++ b/offload/plugins-nextgen/mpi/CMakeLists.txt
@@ -0,0 +1,134 @@
+# Looking for MPI...
+find_package(MPI QUIET)
+
+if(NOT(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
+ message(STATUS "Not building MPI offloading plugin: only support MPI in Linux x86_64 or ppc64le hosts.")
+ return()
+elseif(NOT MPI_CXX_FOUND)
+ message(STATUS "Not building MPI offloading plugin: MPI not found in system.")
+ return()
+endif()
+
+message(STATUS "Building MPI Proxy offloading plugin.")
+
+# Event System
+add_subdirectory(event_system)
+
+# MPI Plugin
+
+# Create the library and add the default arguments.
+add_target_library(omptarget.rtl.mpi MPI)
+
+target_sources(omptarget.rtl.mpi PRIVATE
+ src/rtl.cpp
+)
+
+target_link_libraries(omptarget.rtl.mpi PRIVATE
+ EventSystem
+)
+
+# Add include directories
+target_include_directories(omptarget.rtl.mpi PRIVATE
+ ${LIBOMPTARGET_INCLUDE_DIR})
+
+# Set C++20 as the target standard for this plugin.
+set_target_properties(omptarget.rtl.mpi
+ PROPERTIES
+ CXX_STANDARD 20
+ CXX_STANDARD_REQUIRED ON)
+
+
+# Configure testing for the MPI plugin.
+list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.mpi")
+# Report to the parent scope that we are building a plugin for MPI.
+set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
+
+# Define the target specific triples and ELF machine values.
+set(LIBOMPTARGET_SYSTEM_TARGETS
+ "${LIBOMPTARGET_SYSTEM_TARGETS} x86_64-pc-linux-gnu-mpi nvptx64-nvidia-cuda-mpi" PARENT_SCOPE)
+
+# Remote Plugin Manager
+message(STATUS "Building the llvm-offload-mpi-proxy-device")
+
+set(LIBOMPTARGET_ALL_REMOTE_PLUGIN_TARGETS amdgpu cuda host)
+set(LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "all" CACHE STRING
+ "Semicolon-separated list of plugins to use: cuda, amdgpu, host or \"all\".")
+
+if(LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD STREQUAL "all")
+ set(LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD ${LIBOMPTARGET_ALL_REMOTE_PLUGIN_TARGETS})
+endif()
+
+if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND
+ "host" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
+ message(STATUS "Not building remote host plugin: only Linux systems are supported")
+ list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "host")
+endif()
+if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$"
+ AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
+ if("amdgpu" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
+ message(STATUS "Not building remote AMDGPU plugin: only support AMDGPU in "
+ "Linux x86_64, ppc64le, or aarch64 hosts")
+ list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "amdgpu")
+ endif()
+ if("cuda" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
+ message(STATUS "Not building remote CUDA plugin: only support CUDA in "
+ "Linux x86_64, ppc64le, or aarch64 hosts")
+ list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "cuda")
+ endif()
+endif()
+if("mpi" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
+ message(STATUS "It is not possible to build the mpi plugin inside "
+ "the remote proxy device")
+ list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "mpi")
+endif()
+
+message(STATUS "Building the MPI Plugin with support for remote offloading to "
+ "the \"${LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD}\" plugins")
+
+set(REMOTE_MPI_ENUM_PLUGIN_TARGETS "")
+foreach(plugin IN LISTS LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
+ set(REMOTE_MPI_ENUM_PLUGIN_TARGETS
+ "${REMOTE_MPI_ENUM_PLUGIN_TARGETS}PLUGIN_TARGET(${plugin})\n")
+endforeach()
+string(STRIP ${REMOTE_MPI_ENUM_PLUGIN_TARGETS} REMOTE_MPI_ENUM_PLUGIN_TARGETS)
+configure_file(
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/RemoteTargets.def.in
+ ${LIBOMPTARGET_BINARY_INCLUDE_DIR}/Shared/RemoteTargets.def
+)
+
+llvm_add_tool(OPENMP llvm-offload-mpi-proxy-device
+ src/ProxyDevice.cpp
+ src/RemotePluginManager.cpp
+ ${LIBOMPTARGET_SRC_DIR}/OpenMP/OMPT/Callback.cpp
+)
+
+llvm_update_compile_flags(llvm-offload-mpi-proxy-device)
+
+target_link_libraries(llvm-offload-mpi-proxy-device PRIVATE
+ EventSystem
+ LLVMSupport
+ omp
+)
+
+add_dependencies(llvm-offload-mpi-proxy-device omp)
+
+target_include_directories(llvm-offload-mpi-proxy-device PRIVATE
+ ${LIBOMPTARGET_INCLUDE_DIR}
+ ${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
+ ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
+)
+
+foreach(plugin IN LISTS LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
+ target_link_libraries(llvm-offload-mpi-proxy-device PRIVATE omptarget.rtl.${plugin})
+ add_dependencies(llvm-offload-mpi-proxy-device omptarget.rtl.${plugin})
+endforeach()
+
+# Set C++20 as the target standard for this plugin.
+set_target_properties(llvm-offload-mpi-proxy-device
+ PROPERTIES
+ CXX_STANDARD 20
+ CXX_STANDARD_REQUIRED ON)
+
+target_compile_definitions(llvm-offload-mpi-proxy-device PRIVATE
+ TARGET_NAME=llvm-offload-mpi-proxy-device
+ DEBUG_PREFIX="MPIProxyDevice")
diff --git a/offload/plugins-nextgen/mpi/event_system/CMakeLists.txt b/offload/plugins-nextgen/mpi/event_system/CMakeLists.txt
new file mode 100644
index 00000000000000..32a9f9b79423e1
--- /dev/null
+++ b/offload/plugins-nextgen/mpi/event_system/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Build EventSystem
+add_library(EventSystem OBJECT
+ EventSystem.cpp
+)
+
+target_include_directories(EventSystem PUBLIC
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
+ ${LIBOMPTARGET_INCLUDE_DIR}
+)
+
+target_link_libraries(EventSystem PRIVATE
+ MPI::MPI_CXX
+ LLVMSupport
+)
+
+target_compile_options(EventSystem PUBLIC ${offload_compile_flags})
+target_link_options(EventSystem PUBLIC ${offload_link_flags})
+
+set_target_properties(EventSystem PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+# Set C++20 as the target standard for this plugin.
+set_target_properties(EventSystem
+ PROPERTIES
+ CXX_STANDARD 20
+ CXX_STANDARD_REQUIRED ON)
+
+target_compile_definitions(EventSystem PRIVATE
+ DEBUG_PREFIX="EventSystem")
\ No newline at end of file
diff --git a/offload/plugins-nextgen/mpi/event_system/EventSystem.cpp b/offload/plugins-nextgen/mpi/event_system/EventSystem.cpp
new file mode 100644
index 00000000000000..ab59e3da837fa5
--- /dev/null
+++ b/offload/plugins-nextgen/mpi/event_system/EventSystem.cpp
@@ -0,0 +1,848 @@
+//===------ event_system.cpp - Concurrent MPI communication -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the MPI Event System used by the MPI
+// target runtime for concurrent communication.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EventSystem.h"
+
+#include <algorithm>
+#include <chrono>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <memory>
+
+#include <mpi.h>
+#include <unistd.h>
+
+#include "Shared/APITypes.h"
+#include "Shared/Debug.h"
+#include "Shared/EnvironmentVar.h"
+#include "Shared/Utils.h"
+#include "omptarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Error.h"
+
+#include "llvm/Support/DynamicLibrary.h"
+
+#define CHECK(expr, msg, ...) ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114574
More information about the llvm-commits
mailing list