[llvm-branch-commits] [llvm] [offload] Add `libacctarget` OpenACC runtime (PR #198103)
Ivan R. Ivanov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat May 16 08:13:29 PDT 2026
https://github.com/ivanradanov created https://github.com/llvm/llvm-project/pull/198103
The implementation is subject to change.
---
<sub>Stack created with <a href="https://github.com/github/gh-stack">GitHub Stacks CLI</a> • <a href="https://gh.io/stacks-feedback">Give Feedback 💬</a></sub>
>From 080b7442260fd2e80cd8c1b71e278863d1807f39 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <iivanov at nvidia.com>
Date: Sat, 16 May 2026 07:08:01 -0700
Subject: [PATCH] [offload] Add `libacctarget` OpenACC runtime
The implementation is subject to change.
---
offload/CMakeLists.txt | 1 +
offload/libacctarget/CMakeLists.txt | 75 +
.../libacctarget/CfiDataRuntimeInterface.cpp | 297 +++
offload/libacctarget/DataRuntimeInterface.cpp | 204 ++
offload/libacctarget/Debug.h | 24 +
offload/libacctarget/DeviceManager.cpp | 231 ++
offload/libacctarget/DeviceManager.h | 75 +
offload/libacctarget/Interface.cpp | 2043 +++++++++++++++++
offload/libacctarget/Interface.h | 270 +++
offload/libacctarget/Logger.h | 47 +
offload/libacctarget/Private.h | 29 +
offload/libacctarget/QueueManager.cpp | 179 ++
offload/libacctarget/QueueManager.h | 77 +
offload/libacctarget/RuntimeImpl.cpp | 175 ++
offload/libacctarget/RuntimeInterface.cpp | 248 ++
offload/libacctarget/exports | 182 ++
offload/libacctarget/include/openacc.h | 46 +
17 files changed, 4203 insertions(+)
create mode 100644 offload/libacctarget/CMakeLists.txt
create mode 100644 offload/libacctarget/CfiDataRuntimeInterface.cpp
create mode 100644 offload/libacctarget/DataRuntimeInterface.cpp
create mode 100644 offload/libacctarget/Debug.h
create mode 100644 offload/libacctarget/DeviceManager.cpp
create mode 100644 offload/libacctarget/DeviceManager.h
create mode 100644 offload/libacctarget/Interface.cpp
create mode 100644 offload/libacctarget/Interface.h
create mode 100644 offload/libacctarget/Logger.h
create mode 100644 offload/libacctarget/Private.h
create mode 100644 offload/libacctarget/QueueManager.cpp
create mode 100644 offload/libacctarget/QueueManager.h
create mode 100644 offload/libacctarget/RuntimeImpl.cpp
create mode 100644 offload/libacctarget/RuntimeInterface.cpp
create mode 100644 offload/libacctarget/exports
create mode 100644 offload/libacctarget/include/openacc.h
diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index e63e6bdfc03e3..bcf03b39bc7ca 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -299,6 +299,7 @@ add_subdirectory(docs)
# Build target agnostic offloading library.
add_subdirectory(libompaccsupport)
add_subdirectory(libomptarget)
+add_subdirectory(libacctarget)
add_subdirectory(liboffload)
# Add tests.
diff --git a/offload/libacctarget/CMakeLists.txt b/offload/libacctarget/CMakeLists.txt
new file mode 100644
index 0000000000000..709b1b4789efc
--- /dev/null
+++ b/offload/libacctarget/CMakeLists.txt
@@ -0,0 +1,75 @@
+message(STATUS "Building OpenACC offloading runtime library libacctarget.")
+
+set(ACCTARGET_SRC
+ Interface.cpp
+
+ DeviceManager.cpp
+ QueueManager.cpp
+
+ RuntimeImpl.cpp
+ RuntimeInterface.cpp
+ DataRuntimeInterface.cpp
+ CfiDataRuntimeInterface.cpp
+
+)
+
+add_llvm_library(acctarget
+ SHARED
+
+ ${ACCTARGET_SRC}
+
+ ADDITIONAL_HEADER_DIRS
+ ${LIBOMPTARGET_INCLUDE_DIR}
+ ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
+
+ LINK_COMPONENTS
+ Support
+
+ LINK_LIBS
+ PUBLIC
+ ompaccsupport
+
+ NO_INSTALL_RPATH
+ BUILDTREE_ONLY
+)
+
+target_include_directories(acctarget PRIVATE
+ ${LIBOMPTARGET_INCLUDE_DIR}
+ ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
+ ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+if(LLVM_HAVE_LINK_VERSION_SCRIPT)
+ target_link_libraries(acctarget PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports")
+ set_property(TARGET acctarget APPEND PROPERTY LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/exports)
+endif()
+
+add_dependencies(acctarget PluginErrcodes)
+
+target_compile_definitions(acctarget PRIVATE
+ TARGET_NAME=acctarget
+ DEBUG_PREFIX="acctarget"
+)
+
+target_link_libraries(acctarget PRIVATE ompaccsupport)
+
+target_compile_options(acctarget PRIVATE ${offload_compile_flags})
+target_link_options(acctarget PRIVATE ${offload_link_flags})
+
+# Link against flang_rt.runtime for Fortran descriptor support.
+# flang_rt.runtime is a sibling runtime; link against the shared library target.
+if(TARGET flang_rt.runtime.dynamic)
+ target_link_libraries(acctarget PRIVATE flang_rt.runtime.dynamic)
+elseif(TARGET flang_rt.runtime.static)
+ target_link_libraries(acctarget PRIVATE flang_rt.runtime.static)
+else()
+ message(FATAL_ERROR "flang_rt.runtime target not found")
+endif()
+
+# libomptarget.so needs to be aware of where the plugins live as they
+# are now separated in the build directory.
+set_target_properties(acctarget PROPERTIES
+ POSITION_INDEPENDENT_CODE ON
+ INSTALL_RPATH "$ORIGIN"
+ BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..")
+install(TARGETS acctarget LIBRARY COMPONENT acctarget DESTINATION "${OFFLOAD_INSTALL_LIBDIR}")
diff --git a/offload/libacctarget/CfiDataRuntimeInterface.cpp b/offload/libacctarget/CfiDataRuntimeInterface.cpp
new file mode 100644
index 0000000000000..46d75f111457c
--- /dev/null
+++ b/offload/libacctarget/CfiDataRuntimeInterface.cpp
@@ -0,0 +1,297 @@
+//===- CfiAccEntryImpl.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeviceManager.h"
+#include "Logger.h"
+#include "Private.h"
+#include "omptarget.h"
+
+#include "Interface.h"
+#include "include/openacc.h"
+
+using namespace llvm::acc::target;
+using namespace llvm::acc::target::debug;
+
+#define PREAMBLE() \
+ FUNC_LOGGER(); \
+ AccDataDescF18 AccDesc{{TGT_ACC_DESC_F18}, &Desc->raw()}; \
+ AccDataDescF18 *AccDataDescs[] = {&AccDesc}; \
+ void *ArgPtrs[] = {reinterpret_cast<void *>(&Desc->raw())}; \
+ void *ArgBasePtrs[] = {nullptr}; \
+ int64_t ArgSizes[] = {0}; \
+ int64_t ArgTypes[] = {TGT_ACC_MAPTYPE_NONE};
+
+extern "C" {
+int _cfi_acc_is_present_a(const Fortran::runtime::Descriptor *Desc) {
+ return accIsPresent(Desc->OffsetElement());
+}
+
+int _cfi_acc_create_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_create_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+int _cfi_acc_pcreate_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_pcreate_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+int _cfi_acc_present_or_create_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_present_or_create_async_a(Fortran::runtime::Descriptor *Desc,
+ int *Async) {
+ PREAMBLE();
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_delete_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_delete_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+int _cfi_acc_delete_finalize_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_delete_finalize_async_a(Fortran::runtime::Descriptor *Desc,
+ int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_copyin_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_copyin_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+int _cfi_acc_pcopyin_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_pcopyin_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+int _cfi_acc_present_or_copyin_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_present_or_copyin_async_a(Fortran::runtime::Descriptor *Desc,
+ int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_copyout_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_copyout_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_copyout_finalize_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_copyout_finalize_async_a(Fortran::runtime::Descriptor *Desc,
+ int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_update_device_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_update_device_async_a(Fortran::runtime::Descriptor *Desc,
+ int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_updatein_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_updatein_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_update_self_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_update_self_async_a(Fortran::runtime::Descriptor *Desc,
+ int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_update_host_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_update_host_async_a(Fortran::runtime::Descriptor *Desc,
+ int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+
+int _cfi_acc_updateout_a(Fortran::runtime::Descriptor *Desc) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+ return 0;
+}
+int _cfi_acc_updateout_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+ return 0;
+}
+}
diff --git a/offload/libacctarget/DataRuntimeInterface.cpp b/offload/libacctarget/DataRuntimeInterface.cpp
new file mode 100644
index 0000000000000..e6111932e8e77
--- /dev/null
+++ b/offload/libacctarget/DataRuntimeInterface.cpp
@@ -0,0 +1,204 @@
+//===- AccEntryImpl.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Logger.h"
+#include "Private.h"
+
+#include "Interface.h"
+#include "include/openacc.h"
+
+using namespace llvm::acc::target;
+using namespace llvm::acc::target::debug;
+
+#define PREAMBLE() \
+ FUNC_LOGGER(); \
+ AccDataDescF18 *AccDataDescs[] = {nullptr}; \
+ void *ArgPtrs[] = {Ptr}; \
+ void *ArgBasePtrs[] = {nullptr}; \
+ int64_t ArgSizes[] = {static_cast<int64_t>(Bytes)}; \
+ int64_t ArgTypes[] = {TGT_ACC_MAPTYPE_NONE};
+
+extern "C" {
+int acc_is_present(void *Ptr) { return accIsPresent(Ptr); }
+
+void *acc_create(void *Ptr, size_t Bytes) {
+ return accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_NONE,
+ acc_async_sync);
+}
+void acc_create_async(void *Ptr, size_t Bytes, int Async) {
+ accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_NONE, acc_async_sync);
+}
+void *acc_pcreate(void *Ptr, size_t Bytes) { return acc_create(Ptr, Bytes); }
+void acc_pcreate_async(void *Ptr, size_t Bytes, int Async) {
+ acc_create_async(Ptr, Bytes, Async);
+}
+void *acc_present_or_create(void *Ptr, size_t Bytes) {
+ return acc_create(Ptr, Bytes);
+}
+void acc_present_or_create_async(void *Ptr, size_t Bytes, int Async) {
+ acc_create_async(Ptr, Bytes, Async);
+}
+
+void acc_delete(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_delete_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+void acc_delete_finalize(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_delete_finalize_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void *acc_copyin(void *Ptr, size_t Bytes) {
+ return accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_TO, acc_async_sync);
+}
+void acc_copyin_async(void *Ptr, size_t Bytes, int Async) {
+ accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_TO, acc_async_sync);
+}
+void *acc_pcopyin(void *Ptr, size_t Bytes) { return acc_copyin(Ptr, Bytes); }
+void acc_pcopyin_async(void *Ptr, size_t Bytes, int Async) {
+ acc_copyin_async(Ptr, Bytes, Async);
+}
+void *acc_present_or_copyin(void *Ptr, size_t Bytes) {
+ return acc_copyin(Ptr, Bytes);
+}
+void acc_present_or_copyin_async(void *Ptr, size_t Bytes, int Async) {
+ acc_copyin_async(Ptr, Bytes, Async);
+}
+
+void acc_copyout(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_copyout_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_copyout_finalize(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_copyout_finalize_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+ __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_update_device(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_update_device_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_updatein(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_updatein_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_update_self(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_update_self_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_update_host(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_update_host_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_updateout(void *Ptr, size_t Bytes) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs),
+ acc_async_sync);
+}
+void acc_updateout_async(void *Ptr, size_t Bytes, int Async) {
+ PREAMBLE();
+ ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+ __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+ ArgSizes, ArgTypes, nullptr, nullptr,
+ reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+}
diff --git a/offload/libacctarget/Debug.h b/offload/libacctarget/Debug.h
new file mode 100644
index 0000000000000..917b1f6ce6e2e
--- /dev/null
+++ b/offload/libacctarget/Debug.h
@@ -0,0 +1,24 @@
+//===- Debug.h --------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBACCTARGET_DEBUG_H_
+#define _LIBACCTARGET_DEBUG_H_
+
+namespace llvm::acc::target::debug {
+
+// Debug types to use in libacctarget
+constexpr const char *ADT_Init = "ACCInit";
+constexpr const char *ADT_Mapping = "ACCMapping";
+constexpr const char *ADT_Descriptor = "ACCDescriptor";
+constexpr const char *ADT_Queue = "ACCQueue";
+constexpr const char *ADT_Interface = "ACCInterface";
+constexpr const char *ADT_Kernel = "ACCKernel";
+
+} // namespace llvm::acc::target::debug
+
+#endif // _LIBACCTARGET_DEBUG_H_
diff --git a/offload/libacctarget/DeviceManager.cpp b/offload/libacctarget/DeviceManager.cpp
new file mode 100644
index 0000000000000..d546c035f8331
--- /dev/null
+++ b/offload/libacctarget/DeviceManager.cpp
@@ -0,0 +1,231 @@
+//===- DeviceManager.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeviceManager.h"
+#include "PluginManager.h"
+#include "openacc.h"
+
+// OpenACC 3.4, sec. 2.3.1 "Modifying and Retrieving ICV Values"
+// Each host thread needs its own value, thus these are `thread_local`.
+//
+// The DeviceManager owns these ICVs and they should not be accessible outside
+// and are thus static.
+namespace llvm::acc::target::icv {
+/// OpenACC 3.4, sec. 2.3 "Internal Control Variables"
+/// "acc-current-device-num-var - controls which device of the selected type is
+/// used."
+/// TODO can we use PerThreadTable here?
+static thread_local std::array<DeviceManagerTy::DeviceIdTy,
+ AccDeviceNumConcreteTypes>
+ AccCurrentDeviceNumVar = {0};
+/// OpenACC 3.4, sec. 2.3 "Internal Control Variables"
+/// "acc-current-device-type-var - controls which type of device is used."
+static thread_local acc_device_t AccCurrentDeviceTypeVar = acc_device_default;
+/// The device type to use when the default is asked for. Initially we set it to
+/// none. When the plugins get initialized we will set the default to one of the
+/// target device types we have available.
+static acc_device_t AccCurrentDefaultDeviceTypeVar = acc_device_none;
+
+} // namespace llvm::acc::target::icv
+
+namespace llvm::acc::target {
+DeviceManagerTy *DM = nullptr;
+} // namespace llvm::acc::target
+
+using namespace llvm::acc::target;
+
+static const char *accDeviceToStr(acc_device_t DeviceType) {
+ switch (DeviceType) {
+ case acc_device_nvidia:
+ return "nvidia";
+ case acc_device_amd:
+ return "amd";
+ case acc_device_spirv:
+ return "spirv";
+ case acc_device_none:
+ return "<none>";
+ case acc_device_default:
+ return "<default>";
+ case acc_device_host:
+ return "<host>";
+ case acc_device_not_host:
+ return "<not_host>";
+ default:
+ return "<unknown>";
+ }
+}
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ acc_device_t DeviceType) {
+ return OS << accDeviceToStr(DeviceType) << " ("
+ << static_cast<int>(DeviceType) << ")";
+}
+
+DeviceManagerTy::SingleDeviceTypeMapTy &
+DeviceManagerTy::getSingleDeviceTypeMap(acc_device_t DeviceType) {
+ return PMDeviceMap[DeviceType - AccDeviceTypeOffset];
+}
+
+void DeviceManagerTy::init() {
+ refreshDeviceMapping(/*UpdateDeviceType=*/true);
+}
+
+void DeviceManagerTy::deinit() {}
+
+void DeviceManagerTy::refreshDeviceMapping(bool UpdateDeviceType) {
+ assert(this);
+
+ for (int DeviceTypeInt = acc_device_concrete_type_begin;
+ DeviceTypeInt < acc_device_concrete_type_end; DeviceTypeInt++)
+ getSingleDeviceTypeMap(acc_device_nvidia).resize(0);
+
+ auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor();
+ for (DeviceTy &Device : PM->devices(ExclusiveDevicesAccessor)) {
+ if (Device.RTL->getTripleArch() == llvm::Triple::nvptx64) {
+ getSingleDeviceTypeMap(acc_device_nvidia).push_back(Device.DeviceID);
+ } else if (Device.RTL->getTripleArch() == llvm::Triple::amdgcn) {
+ getSingleDeviceTypeMap(acc_device_amd).push_back(Device.DeviceID);
+ } else if (Device.RTL->getTripleArch() == llvm::Triple::spirv64) {
+ getSingleDeviceTypeMap(acc_device_spirv).push_back(Device.DeviceID);
+ }
+ }
+
+ ODBG() << "Refreshed OpenACC devices:";
+ for (int DeviceTypeInt = acc_device_concrete_type_begin;
+ DeviceTypeInt < acc_device_concrete_type_end; DeviceTypeInt++) {
+ acc_device_t DeviceType = static_cast<acc_device_t>(DeviceTypeInt);
+ unsigned Num = getSingleDeviceTypeMap(DeviceType).size();
+ ODBG() << " Type " << DeviceType;
+ for (unsigned I = 0; I < Num; I++) {
+ ODBG() << " OpenACC Device #" << I << " -> PM Device #"
+ << getSingleDeviceTypeMap(DeviceType)[I];
+ }
+ }
+
+ if (UpdateDeviceType) {
+ // Set the default current device type to a device we have available in the
+ // below order of priority.
+ auto CheckType = [&](acc_device_t Type) {
+ if (getSingleDeviceTypeMap(Type).size() > 0) {
+ assert(Type >= acc_device_concrete_type_begin &&
+ Type < acc_device_concrete_type_end &&
+ "We should only set AccCurrentDefaultDeviceTypeVar to a "
+ "concrete type");
+ ODBG() << "Updating AccCurrentDefaultDeviceTypeVar to " << Type;
+ icv::AccCurrentDefaultDeviceTypeVar = Type;
+ return true;
+ }
+ return false;
+ };
+ false || CheckType(acc_device_nvidia) || CheckType(acc_device_amd) ||
+ CheckType(acc_device_spirv) || CheckType(acc_device_host);
+ }
+}
+
+int DeviceManagerTy::getPMDeviceId(acc_device_t DeviceType) {
+ ODBG() << "Getting device for " << DeviceType;
+ if (DeviceType == acc_device_none) {
+ DeviceType = icv::AccCurrentDeviceTypeVar;
+ ODBG() << "Correcting to current type " << DeviceType;
+ }
+ if (DeviceType == acc_device_default) {
+ ODBG() << "Corrected to value of AccCurrentDefaultDeviceTypeVar: "
+ << icv::AccCurrentDefaultDeviceTypeVar;
+ DeviceType = icv::AccCurrentDefaultDeviceTypeVar;
+ }
+ ODBG() << "Current device has id " << icv::AccCurrentDeviceNumVar[DeviceType];
+ checkICVs();
+ return getSingleDeviceTypeMap(
+ DeviceType)[icv::AccCurrentDeviceNumVar[DeviceType]];
+}
+
+int DeviceManagerTy::getPMDeviceId() {
+ ODBG() << "Getting current device, type " << icv::AccCurrentDeviceTypeVar;
+ checkICVs();
+ return getPMDeviceId(icv::AccCurrentDeviceTypeVar);
+}
+
+int DeviceManagerTy::getDeviceId(acc_device_t DeviceType) {
+ checkICVs();
+ return icv::AccCurrentDeviceNumVar[DeviceType];
+}
+
+void DeviceManagerTy::checkICVs() {
+ ODBG() << "acc-current-device-type = " << icv::AccCurrentDeviceTypeVar;
+ for (int DeviceTypeInt = acc_device_concrete_type_begin;
+ DeviceTypeInt < acc_device_concrete_type_end; DeviceTypeInt++) {
+ acc_device_t DeviceType = static_cast<acc_device_t>(DeviceTypeInt);
+ ODBG() << "acc-current-device-num[" << DeviceType
+ << "] = " << icv::AccCurrentDeviceNumVar[DeviceType];
+ }
+ ODBG() << "acc-current-device-type = " << icv::AccCurrentDeviceTypeVar;
+ assert(icv::AccCurrentDeviceTypeVar == acc_device_default ||
+ (icv::AccCurrentDeviceTypeVar >= acc_device_concrete_type_begin &&
+ icv::AccCurrentDeviceTypeVar < acc_device_concrete_type_end));
+ acc_device_t DeviceType = icv::AccCurrentDeviceTypeVar;
+ if (DeviceType == acc_device_default) {
+ DeviceType = icv::AccCurrentDefaultDeviceTypeVar;
+ ODBG() << "Corrected to value of AccCurrentDefaultDeviceTypeVar: "
+ << icv::AccCurrentDefaultDeviceTypeVar;
+ }
+ ODBG() << icv::AccCurrentDeviceNumVar[DeviceType];
+ assert(icv::AccCurrentDeviceNumVar[DeviceType] <
+ static_cast<int64_t>(getSingleDeviceTypeMap(DeviceType).size()));
+}
+
+int DeviceManagerTy::getNumDevices(acc_device_t DeviceType) {
+ checkICVs();
+ return getSingleDeviceTypeMap(DeviceType).size();
+}
+
+void DeviceManagerTy::setAllDeviceId(int DevNum) {
+ for (auto &CurrDevNum : icv::AccCurrentDeviceNumVar) {
+ CurrDevNum = DevNum;
+ }
+ checkICVs();
+}
+
+void DeviceManagerTy::setDeviceId(acc_device_t DeviceType, int DevNum) {
+ icv::AccCurrentDeviceNumVar[DeviceType] = DevNum;
+ checkICVs();
+}
+
+void DeviceManagerTy::setDeviceId(int DevNum) {
+ setDeviceId(icv::AccCurrentDeviceTypeVar, DevNum);
+ checkICVs();
+}
+
+acc_device_t DeviceManagerTy::getDeviceType() {
+ checkICVs();
+ return icv::AccCurrentDeviceTypeVar;
+}
+
+void DeviceManagerTy::setDeviceType(acc_device_t DeviceType) {
+ icv::AccCurrentDeviceTypeVar = DeviceType;
+ checkICVs();
+}
+
+size_t DeviceManagerTy::getDeviceProperty(int, acc_device_t,
+ acc_device_property_t) {
+ REPORT_FATAL() << "device properties not yet implemented";
+ return 0;
+}
+
+const char *DeviceManagerTy::getDevicePropertyString(int, acc_device_t,
+ acc_device_property_t) {
+ REPORT_FATAL() << "device properties not yet implemented";
+ return "";
+}
+
+llvm::Expected<DeviceTy &> DeviceManagerTy::getDevice(acc_device_t DeviceType) {
+ return PM->getDevice(getPMDeviceId(DeviceType));
+}
+
+llvm::Expected<DeviceTy &> DeviceManagerTy::getDevice() {
+ return PM->getDevice(getPMDeviceId());
+}
diff --git a/offload/libacctarget/DeviceManager.h b/offload/libacctarget/DeviceManager.h
new file mode 100644
index 0000000000000..d825eeb1c2160
--- /dev/null
+++ b/offload/libacctarget/DeviceManager.h
@@ -0,0 +1,75 @@
+//===- DeviceManager.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ACC_OFFLOAD_DEVICE_MANAGER_H_
+#define LLVM_ACC_OFFLOAD_DEVICE_MANAGER_H_
+
+#include "include/openacc.h"
+#include "omptarget.h"
+#include <array>
+#include <cstddef>
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ acc_device_t DeviceType);
+
+namespace llvm::acc::target {
+
+constexpr int AccDeviceNumConcreteTypes =
+ acc_device_concrete_type_end - acc_device_concrete_type_begin;
+constexpr int AccDeviceTypeOffset = acc_device_concrete_type_begin;
+
+class DeviceManagerTy {
+public:
+ using DeviceIdTy = int64_t;
+
+private:
+ using SingleDeviceTypeMapTy = llvm::SmallVector<DeviceIdTy, 8>;
+ using AllDeviceTypeMap =
+ std::array<SingleDeviceTypeMapTy, AccDeviceNumConcreteTypes>;
+ AllDeviceTypeMap PMDeviceMap;
+
+ SingleDeviceTypeMapTy &getSingleDeviceTypeMap(acc_device_t DeviceType);
+
+public:
+ void init();
+ void deinit();
+
+ // Refreshes the device mapping according to the
+ void refreshDeviceMapping(bool UpdateDeviceType);
+
+ // Entry points for ACC APIs.
+ int getDeviceId(acc_device_t DeviceType);
+ int getNumDevices(acc_device_t DeviceType);
+
+ void setAllDeviceId(int DeviceId);
+ void setDeviceId(acc_device_t DeviceType, int DeviceId);
+ void setDeviceId(int DeviceId);
+
+ acc_device_t getDeviceType();
+ void setDeviceType(acc_device_t DeviceType);
+
+ size_t getDeviceProperty(int DeviceId, acc_device_t DeviceType,
+ acc_device_property_t DeviceProperty);
+ const char *getDevicePropertyString(int DeviceId, acc_device_t DeviceType,
+ acc_device_property_t DeviceProperty);
+
+ // Verification.
+ void checkICVs();
+
+ // Obtaining the device ID for use with PluginManager.
+ int getPMDeviceId(acc_device_t DeviceType);
+ int getPMDeviceId();
+
+ llvm::Expected<DeviceTy &> getDevice(acc_device_t DeviceType);
+ llvm::Expected<DeviceTy &> getDevice();
+};
+
+extern DeviceManagerTy *DM;
+} // namespace llvm::acc::target
+
+#endif // LLVM_ACC_OFFLOAD_DEVICE_MANAGER_H_
diff --git a/offload/libacctarget/Interface.cpp b/offload/libacctarget/Interface.cpp
new file mode 100644
index 0000000000000..002813635cc00
--- /dev/null
+++ b/offload/libacctarget/Interface.cpp
@@ -0,0 +1,2043 @@
+//===- Interface.cpp --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interface.h"
+#include "Debug.h"
+#include "DeviceManager.h"
+#include "Logger.h"
+#include "OpenMP/Mapping.h"
+#include "PluginManager.h"
+#include "Private.h"
+#include "QueueManager.h"
+#include "Shared/APITypes.h"
+#include "Shared/Debug.h"
+#include "Shared/SourceInfo.h"
+#include "device.h"
+#include "omptarget.h"
+#include "openacc.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <optional>
+#include <sstream>
+#include <string.h>
+#include <string>
+#include <type_traits>
+#include <variant>
+#include <vector>
+
+using namespace llvm::acc::target;
+using namespace llvm::acc::target::debug;
+
+using llvm::SmallVector;
+
+namespace {
+constexpr int32_t AccAsyncSync = acc_async_sync;
+constexpr int32_t AccAsyncDefault = acc_async_default;
+constexpr int32_t AccAsyncNoval = acc_async_noval;
+constexpr int32_t AccAsyncDefaultQueue = -5;
+} // namespace
+
+namespace llvm::acc::target::icv {
+// acc-default-async-var
+thread_local int32_t AccDefaultAsyncVar = AccAsyncDefaultQueue;
+} // namespace llvm::acc::target::icv
+
+namespace {
+// TODO hook up to some env var
+bool Pedantic = true;
+
+#define STR_AND_VAL(X) #X << " " << X
+#define STR_AND_VALI(X) #X << " " << (int64_t)X
+#define OPT_STR_AND_VAL(X) #X << " " << (X ? std::to_string(*X) : "(nil)")
+#define SOPT_STR_AND_VAL(X) #X << " " << (X ? X : "(nil)")
+
+struct DescMappingInfoTy {
+ // The size of the descriptor
+ size_t DescriptorSize = 0;
+ // The offset in the host descriptor where the pointer to the raw memory is
+ // stored.
+ size_t RawMemoryPtrOffset = 0;
+};
+
+struct MemMappingInfoTy {
+ void *RawMemoryPtr = nullptr;
+ void *RawMemoryBasePtr = nullptr;
+ std::optional<uint64_t> RawMemorySize = std::nullopt;
+ std::optional<NonContigDescTy> CopyDesc = std::nullopt;
+
+ ptrdiff_t getBaseDelta() {
+ ptrdiff_t Delta = reinterpret_cast<intptr_t>(RawMemoryPtr) -
+ reinterpret_cast<intptr_t>(RawMemoryBasePtr);
+ return Delta;
+ }
+ void verify() { assert(RawMemoryPtr); }
+ void dump(llvm::raw_ostream &OS) {
+ OS << "MemMappingInfoTy:\n";
+ // clang-format off
+ OS
+ << " " << STR_AND_VAL(RawMemoryPtr)
+ << " " << STR_AND_VAL(RawMemoryBasePtr)
+ << " " << OPT_STR_AND_VAL(RawMemorySize)
+ << " " << !!CopyDesc
+ << "\n";
+ // clang-format on
+ }
+};
+
+struct AccArrayDim {
+ long Offset;
+ long Stride;
+ long Size;
+ long Extent;
+};
+
+struct ArrayInfo {
+ std::vector<AccArrayDim> Dims;
+ // The size of the raw memory allocation.
+ std::optional<uint64_t> RawMemorySize = {};
+ // The address of the host memory to be copied.
+ void *RawMemoryAddr = nullptr;
+ // Size of the array element.
+ int64_t ElementSize = 0;
+
+ void setPtr(void *Ptr) { RawMemoryAddr = Ptr; }
+
+ std::optional<size_t> getSizeInDim(ident_t *Loc, unsigned I) {
+ if (Dims[I].Stride < 0) {
+ REPORT_FATAL() << Loc << "Unsupported negative stride";
+ }
+
+ auto TrySize = [&](int64_t Size) -> std::optional<size_t> {
+ if (Size == -1) {
+ return std::nullopt;
+ }
+ return Size * Dims[I].Stride * ElementSize;
+ };
+
+ // Prefer the `size` instead of `extent`. This is due to cases like this:
+ //
+ // real a0(100)
+ // call acc_copyin(a0(1:99))
+ // !$acc present(a0(1:99))
+ //
+ // Where the `acc_copyin` will allocate space for 99 elements because we
+ // parse the flang descriptor which only contains information on the size
+ // (99), but then if we use the `extent` from the `acc present`, we would
+ // require 100 elements, which is larger than the previously allocated
+ // memory. Thus, we use the `size`.
+
+ if (auto Size = TrySize(Dims[I].Size)) {
+ return Size;
+ }
+ if (auto Size = TrySize(Dims[I].Extent)) {
+ return Size;
+ }
+
+ return std::nullopt;
+ }
+
+ void computeSizeFromDims(ident_t *Loc) {
+ if (Dims.size() == 0) {
+ RawMemorySize = ElementSize;
+ return;
+ }
+
+ std::optional<size_t> LargestSize = getSizeInDim(Loc, Dims.size() - 1);
+ RawMemorySize = LargestSize;
+ ODBG(ADT_Descriptor) << "Computed " << OPT_STR_AND_VAL(RawMemorySize);
+
+#ifndef NDEBUG
+ if (!LargestSize) {
+ return;
+ }
+ for (unsigned I = 0; I < Dims.size() - 1; I++) {
+ auto Size = getSizeInDim(Loc, I);
+ assert(!Size || *Size <= *LargestSize);
+ }
+#endif
+ }
+
+ void normalize() {
+ normalizeStrides();
+ normalizeOffsets();
+ }
+
+ bool hasNormalizedStrides() {
+ for (std::size_t i = 0; i < Dims.size(); i++) {
+ if (Dims[i].Stride < 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void normalizeStrides() {
+ FUNC_LOGGER();
+ ODBG_IF([&]() { dump(llvm::dbgs()); });
+
+ if (hasNormalizedStrides()) {
+ ODBG(ADT_Descriptor) << "No normalization needed.";
+ return;
+ }
+ ODBG(ADT_Descriptor) << "Descriptor needs normalization.";
+
+ // The runtime cannot map negative stride arrays. So we must find the base
+ // address of the host pointer and then invert the descriptor so that the
+ // strides in all dimensions are positive. The base pointer delta will be
+ // used to attach the adjusted device pointer to the array descriptor - that
+ // is, the F18 descriptor will contain the end address of the array because
+ // that is what the compiler assumes.
+ int64_t baseHostPtrDeltaInBytes = 0;
+ for (std::size_t i = 0; i < Dims.size(); i++) {
+ if (Dims[i].Stride < 0) {
+ Dims[i].Stride = -Dims[i].Stride;
+ Dims[i].Offset = Dims[i].Extent - Dims[i].Size - Dims[i].Offset;
+
+ // For each negative stride, skip to previously accounted array.
+ baseHostPtrDeltaInBytes += Dims[i].Stride * (Dims[i].Extent - 1);
+ }
+ }
+
+ baseHostPtrDeltaInBytes *= ElementSize;
+
+ RawMemoryAddr =
+ reinterpret_cast<char *>(RawMemoryAddr) - baseHostPtrDeltaInBytes;
+
+ ODBG(ADT_Descriptor) << "Normalized:";
+ ODBG_IF([&]() { dump(llvm::dbgs()); });
+ }
+
+ bool hasNormalizedOffsets() {
+ for (std::size_t i = 0; i < Dims.size(); i++) {
+ if (Dims[i].Offset != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void normalizeOffsets() {
+ FUNC_LOGGER();
+ ODBG_IF([&]() { dump(llvm::dbgs()); });
+ assert(hasNormalizedStrides());
+
+ if (hasNormalizedOffsets()) {
+ ODBG(ADT_Descriptor) << "No normalization needed.";
+ return;
+ }
+ ODBG(ADT_Descriptor) << "Descriptor needs normalization.";
+
+ int64_t baseHostPtrDeltaInBytes = 0;
+ for (auto &Dim : Dims) {
+ if (Dim.Offset != 0) {
+ baseHostPtrDeltaInBytes += Dim.Offset * Dim.Stride;
+ Dim.Offset = 0;
+ }
+ }
+
+ baseHostPtrDeltaInBytes *= ElementSize;
+
+ RawMemoryAddr =
+ reinterpret_cast<char *>(RawMemoryAddr) + baseHostPtrDeltaInBytes;
+
+ ODBG(ADT_Descriptor) << "Normalized:";
+ ODBG_IF([&]() { dump(llvm::dbgs()); });
+ }
+
+ void verify() {
+ assert(ElementSize > 0);
+ for (unsigned I = 0; I < Dims.size() - 1; I++) {
+ assert(Dims[I].Stride < Dims[I + 1].Stride &&
+ "Expected dimensions to be sorted");
+ }
+ }
+
+ void dump(llvm::raw_ostream &OS) {
+ OS << "ArrayInfo:\n";
+ for (unsigned I = 0; I < Dims.size(); I++) {
+ // clang-format off
+ OS << " Dim " << I
+ << "\t" << STR_AND_VAL(Dims[I].Offset)
+ << "\t" << STR_AND_VAL(Dims[I].Size)
+ << "\t" << STR_AND_VAL(Dims[I].Stride)
+ << "\t" << STR_AND_VAL(Dims[I].Extent)
+ << "\n";
+ // clang-format on
+ }
+ // clang-format off
+ OS << " "
+ << " " << STR_AND_VAL(RawMemoryAddr)
+ << " " << OPT_STR_AND_VAL(RawMemorySize)
+ << " " << STR_AND_VAL(ElementSize)
+ << "\n";
+ // clang-format on
+ };
+
+ /// See the llvm-project/offload/test/offloading/non_contiguous_update.cpp
+ /// test for examples.
+ std::optional<NonContigDescTy> generateNonContigCopyDesc(ident_t *Loc) {
+ NonContigDescTy CopyDesc;
+ CopyDesc.Dims.reserve(Dims.size() + 1);
+
+ for (int I = Dims.size() - 1; I >= 0; I--) {
+ auto const &Dim = Dims[I];
+ if (Dim.Size < 0) {
+ ODBG(ADT_Descriptor)
+ << "Dim size missing, cannot build copy descriptor";
+ return std::nullopt;
+ }
+ CopyDesc.Dims.push_back({});
+ auto &LastDim = CopyDesc.Dims.back();
+ LastDim.Count = Dim.Size;
+ LastDim.Stride = Dim.Stride * ElementSize;
+ LastDim.Offset = Dim.Offset * LastDim.Stride;
+ }
+
+ CopyDesc.Dims.push_back({});
+ auto &LastDim = CopyDesc.Dims.back();
+ LastDim.Count = ElementSize;
+ LastDim.Offset = 0;
+ LastDim.Stride = 1;
+
+ return CopyDesc;
+ }
+};
+
+struct MaterializedMemRefDesc {
+ void *allocatedPtr;
+ void *alignedPtr;
+ uint64_t offset;
+ int64_t elementSize;
+ unsigned char rank;
+ const uint64_t *sizes;
+ const uint64_t *strides;
+};
+
+void dump(const MaterializedMemRefDesc &Desc, llvm::raw_ostream &OS) {
+ // clang-format off
+ OS
+ << " " << STR_AND_VAL(Desc.allocatedPtr)
+ << " " << STR_AND_VAL(Desc.alignedPtr)
+ << " " << STR_AND_VAL(Desc.offset)
+ << " " << STR_AND_VAL(Desc.elementSize)
+ << " " << STR_AND_VALI(Desc.rank)
+ << "\n";
+ for (unsigned I = 0; I < Desc.rank; I++) {
+ OS << "Dim " << I
+ << " " << STR_AND_VAL(Desc.sizes[I])
+ << " " << STR_AND_VAL(Desc.strides[I])
+ << "\n";
+ }
+ // clang-format on
+}
+
+void dump(const AccDataDescOpenACC &Desc, llvm::raw_ostream &OS) {
+ // clang-format off
+ OS
+ << " " << STR_AND_VAL(Desc.Base.Version)
+ << " " << STR_AND_VALI(Desc.Rank)
+ << " " << STR_AND_VAL(Desc.ElementSize)
+ << "\n";
+ for (unsigned I = 0; I < Desc.Rank; I++) {
+ OS << "Dim " << I
+ << " " << STR_AND_VAL(Desc.LowerBounds[I])
+ << " " << STR_AND_VAL(Desc.UpperBounds[I])
+ << " " << STR_AND_VAL(Desc.Extents[I])
+ << " " << STR_AND_VAL(Desc.StridesInBytes[I])
+ << " " << STR_AND_VAL(Desc.StartIndices[I])
+ << "\n";
+ }
+ // clang-format on
+}
+
+template <class... Ts> struct overloads : Ts... {
+ using Ts::operator()...;
+};
+template <class... Ts> overloads(Ts...) -> overloads<Ts...>;
+
+std::string asyncToString(int64_t Async) {
+ if (Async >= 0) {
+ return "STREAM(" + std::to_string(Async) + ")";
+ } else if (Async == AccAsyncSync) {
+ return "SYNC";
+ } else if (Async == AccAsyncDefault) {
+ return "DEFAULT";
+ } else if (Async == AccAsyncNoval) {
+ return "NOVAL";
+ } else {
+ return "UNKNOWN";
+ }
+}
+
+std::string mapTypeToString(int64_t Type) {
+ std::stringstream SS;
+
+ if (Type & TGT_ACC_MAPTYPE_TO)
+ SS << "TO ";
+ if (Type & TGT_ACC_MAPTYPE_FROM)
+ SS << "FROM ";
+ if (Type & TGT_ACC_MAPTYPE_FINALIZE)
+ SS << "DELETE ";
+ if (Type & TGT_ACC_MAPTYPE_PTR_AND_OBJ)
+ SS << "PTR_AND_OBJ ";
+ if (Type & TGT_ACC_MAPTYPE_PRIVATE)
+ SS << "PRIVATE ";
+ if (Type & TGT_ACC_MAPTYPE_LITERAL)
+ SS << "LITERAL ";
+ if (Type & TGT_ACC_MAPTYPE_DEVPTR)
+ SS << "DEVPTR ";
+ if (Type & TGT_ACC_MAPTYPE_MANAGED_DEVPTR)
+ SS << "MANAGED_DEVPTR ";
+ if (Type & TGT_ACC_MAPTYPE_NO_CREATE)
+ SS << "NO_CREATE ";
+ if (Type & TGT_ACC_MAPTYPE_GANG_PRIVATE)
+ SS << "GANG_PRIVATE ";
+ if (Type & TGT_ACC_MAPTYPE_WORKER_PRIVATE)
+ SS << "WORKER_PRIVATE ";
+ if (Type & TGT_ACC_MAPTYPE_VECTOR_PRIVATE)
+ SS << "VECTOR_PRIVATE ";
+ if (Type & TGT_ACC_MAPTYPE_INIT_ZERO)
+ SS << "INIT_ZERO ";
+ if (Type & TGT_ACC_MAPTYPE_DEVICE_RESIDENT)
+ SS << "DEVICE_RESIDENT ";
+ if (Type & TGT_ACC_MAPTYPE_IF_PRESENT)
+ SS << "IF_PRESENT ";
+
+ std::string Str = SS.str();
+ if (Str.empty())
+ return "(none)";
+ else
+ // Remove trailing space.
+ Str.resize(Str.size() - 1);
+
+ return Str;
+}
+
+enum class AccCopyOutType { Always, OnDelete, Never };
+enum class AccRefCountingType { Dynamic, Structured };
+
+struct PostProcessingInfo {
+ /// The target pointer information.
+ TargetPointerResultTy TPR;
+ int64_t DataSize;
+ bool ShouldRestoreShadow;
+ bool ShouldDelete;
+};
+
+struct KernelArgsMappingInfoTy {
+ AccKernelArgsTy &KernelArgs;
+
+ // Memory needed for launch
+ void addLaunchAlloc(void *Alloc) { LaunchAllocs.push_back(Alloc); }
+ SmallVector<void *> LaunchAllocs;
+
+ // Arguments
+ void addArg(void *Arg) { Args.push_back(Arg); }
+ SmallVector<void *> Args;
+ SmallVector<void *> Ptrs;
+
+ KernelLaunchParamsTy getLaunchArgs() {
+ assert(Ptrs.size() == 0);
+
+ if (Args.size() == 0)
+ return KernelLaunchParamsTy{};
+
+ unsigned NumArgs = Args.size();
+ Ptrs.resize(NumArgs);
+ for (uint32_t I = 0; I < NumArgs; ++I)
+ Ptrs[I] = &Args[I];
+ return KernelLaunchParamsTy{sizeof(void *) * NumArgs, &Args[0], &Ptrs[0]};
+ }
+};
+
+[[nodiscard]] int accPostProcessingTargetDataEnd(DeviceTy *Device,
+ PostProcessingInfo *Info) {
+ // This will make sure we delete it when we exit the function.
+ std::unique_ptr<PostProcessingInfo> InfoDeleter(Info);
+
+ int Ret = OFFLOAD_SUCCESS;
+
+ assert(!Info->TPR.isHostPointer());
+
+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+ Device->getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
+
+ // We cannot use a lock guard because we may end up delete the mutex.
+ // We also explicitly unlocked the entry after it was put in the EntriesInfo
+ // so it can be reused.
+ Info->TPR.getEntry()->lock();
+ auto *Entry = Info->TPR.getEntry();
+
+ // TODO I do not understand why this is necessary - does the mapping
+ // automatically queue up entry deletion?
+ bool DelEntry = Info->ShouldDelete;
+ const bool IsNotLastUser = Entry->decDataEndThreadCount() != 0;
+ if (DelEntry && (Entry->getTotalRefCount() != 0 || IsNotLastUser)) {
+ ODBG(ADT_Mapping) << "IsNotLastUser";
+ // The thread is not in charge of deletion anymore. Give up access
+ // to the HDTT map and unset the deletion flag.
+ HDTTMap.destroy();
+ DelEntry = false;
+ }
+
+ if (Info->ShouldRestoreShadow) {
+ Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) {
+ ODBG(ADT_Mapping) << "Restoring host shadow "
+ << (void *)ShadowPtr.HstPtrAddr
+ << " to its original content (" << ShadowPtr.PtrSize
+ << " bytes)";
+ std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
+ ShadowPtr.PtrSize);
+ return OFFLOAD_SUCCESS;
+ });
+ }
+
+ // Give up the lock as we either don't need it anymore (e.g., done with
+ // TPR), or erase TPR.
+ Info->TPR.setEntry(nullptr);
+
+ if (!Info->ShouldDelete)
+ return Ret;
+
+ Ret = Device->getMappingInfo().eraseMapEntry(HDTTMap, Entry, Info->DataSize);
+ // Entry is already remove from the map, we can unlock it now.
+ HDTTMap.destroy();
+ Ret |= Device->getMappingInfo().deallocTgtPtrAndEntry(Entry, Info->DataSize);
+ if (Ret != OFFLOAD_SUCCESS)
+ REPORT_FATAL() << "Deallocating data from device failed.";
+
+ return OFFLOAD_SUCCESS;
+}
+
+template <typename SizeTy>
+void handleSingleDataEnd(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+ SizeTy ArgSize, bool ForceDelete, bool IsNoCreate,
+ AccCopyOutType CopyType, AccRefCountingType MapType,
+ AsyncInfoTy &AsyncInfo, DeviceTy &Device) {
+ int64_t DataSize;
+ if constexpr (std::is_same<SizeTy, int64_t>::value) {
+ DataSize = ArgSize;
+ } else if constexpr (std::is_same<SizeTy, NonContigDescTy &>::value) {
+ DataSize = ArgSize.getAllocSize();
+ } else {
+ static_assert(false);
+ }
+
+ FUNC_LOGGER();
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+ ArgPtr, DataSize, /*UpdateRefCount=*/true,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured, IsNoCreate,
+ ForceDelete,
+ /*FromDataEnd=*/true);
+ if (!TPR.isPresent()) {
+ ODBG(ADT_Mapping) << "Mapping does not exist: "
+ << (IsNoCreate ? "is no_create" : "error");
+ if (Pedantic && !IsNoCreate)
+ REPORT_FATAL() << "Device mapping does not exist at " << Loc;
+ return;
+ }
+
+ void *HstPtrBegin = ArgPtr;
+ void *TgtPtrBegin = TPR.TargetPointer;
+ ODBG(ADT_Mapping) << "There are " << DataSize
+ << " bytes allocated at target address " << TgtPtrBegin
+ << " - is" << (TPR.Flags.IsLast ? "" : " not") << " last";
+
+ bool ShouldDelete = ForceDelete || TPR.Flags.IsLast;
+ bool ShouldCopyOut = CopyType == AccCopyOutType::Always ||
+ (CopyType == AccCopyOutType::OnDelete && ShouldDelete);
+ if (ShouldCopyOut) {
+ ODBG(ADT_Mapping) << "Moving " << DataSize << " bytes (tgt:" << TgtPtrBegin
+ << ") -> (hst:" << HstPtrBegin << ")";
+ int Ret;
+ if constexpr (std::is_same<SizeTy, int64_t>::value) {
+ Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo,
+ TPR.getEntry());
+ } else if constexpr (std::is_same<SizeTy, NonContigDescTy &>::value) {
+ Ret = Device.retrieveNonContigData(HstPtrBegin, TgtPtrBegin, ArgSize,
+ AsyncInfo, TPR.getEntry());
+ }
+ if (Ret != OFFLOAD_SUCCESS)
+ REPORT_FATAL() << "Failed to transfer data from device at " << Loc;
+ }
+
+ ODBG(ADT_Mapping) << "Queueing up post processing";
+
+ // TODO We may want to have a more intricate system for queueing up post
+ // processing. In OpenACC, we could potentially queue up a lot of stream
+ // operations before syncing, and we only execute these post processing
+ // functions after we sync. This would leave a lot of deallocation and
+ // unmapping queued for post processing but never happening because the
+ // operations in the stream are continuing to execute while we add more post
+ // processing funcitons which we never execute.
+ //
+ // One option here is to use Device.enqueueHostCall, however, for example, for
+ // CUDA, having cuFree's etc in a function executed in the stream is not
+ // supported (the context in that thread is invalid for calling cuda
+ // functions). Instead, we can have the host call "notify" that we can execute
+ // specific post processing functions, and we execute them at some point
+ // during execution on the normal threads.
+ auto *PostProcessingPtr = new PostProcessingInfo{std::move(TPR), DataSize,
+ ShouldCopyOut, ShouldDelete};
+ PostProcessingPtr->TPR.getEntry()->unlock();
+ AsyncInfo.addPostProcessingFunction([=, Device = &Device]() -> int {
+ return accPostProcessingTargetDataEnd(Device, PostProcessingPtr);
+ });
+}
+
+struct DescAndMemMappingInfoTy {
+ DescMappingInfoTy Desc;
+ std::optional<MemMappingInfoTy> Memory;
+};
+
+struct ArgDescriptorsTy {
+
+ const Fortran::runtime::Descriptor *Flang = nullptr;
+ std::optional<MaterializedMemRefDesc> MemRef = std::nullopt;
+ const AccDataDescOpenACC *Acc = nullptr;
+
+ bool isNone() { return !Flang && !MemRef && !Acc; }
+
+ void verify() {
+ assert(!(Flang && MemRef));
+ assert(!isNone());
+ }
+
+ void dump(llvm::raw_ostream &OS) {
+ OS << "ArgDescriptorsTy:\n";
+ OS << "Flang:\n";
+ if (Flang) {
+ // TODO can we use OS somehow?
+ Flang->Dump(stderr);
+ } else {
+ OS << "(nil)\n";
+ }
+ OS << "MemRef:\n";
+ if (MemRef) {
+ ::dump(*MemRef, OS);
+ } else {
+ OS << "(nil)\n";
+ }
+ OS << "Acc:\n";
+ if (Acc) {
+ ::dump(*Acc, OS);
+ } else {
+ OS << "(nil)\n";
+ }
+ }
+
+ void collectAccBounds(ident_t *Loc, ArrayInfo &AI) {
+ assert(Acc);
+ if (AI.ElementSize <= 0) {
+ REPORT_FATAL() << Loc << "Invalid element size";
+ }
+
+ AI.Dims.reserve(Acc->Rank);
+ for (std::size_t I = 0; I < Acc->Rank; I++) {
+ AI.Dims.push_back({});
+ auto &ThisDim = AI.Dims.back();
+ long SizeFactor = 1;
+ if (Acc->StridesInBytes[I] % AI.ElementSize != 0) {
+ if (I == 0) {
+ // `stride` in AccArrayDim is meant to be multiplied by elementsize.
+ // But the stride of a sliced descriptor array might not be divisible
+ // by the current element size. So, reduce elementsize.
+ SizeFactor =
+ AI.ElementSize / std::gcd(AI.ElementSize, Acc->StridesInBytes[I]);
+ AI.ElementSize /= SizeFactor;
+ } else {
+ REPORT_FATAL() << Loc << "Invalid array stride";
+ }
+ }
+ ThisDim.Offset = Acc->LowerBounds[I];
+ ThisDim.Stride = Acc->StridesInBytes[I] / AI.ElementSize;
+ ThisDim.Size =
+ SizeFactor * (Acc->UpperBounds[I] - Acc->LowerBounds[I] + 1);
+ ThisDim.Extent = Acc->Extents[I];
+ }
+ }
+
+ void collectFlangBounds(ident_t *Loc, ArrayInfo &AI) {
+ assert(Flang);
+ if (AI.ElementSize <= 0) {
+ REPORT_FATAL() << Loc << "Invalid element size";
+ }
+
+ AI.Dims.reserve(Flang->rank());
+ AI.ElementSize = Flang->ElementBytes();
+ for (int I = 0; I < Flang->rank(); I++) {
+ AI.Dims.push_back({});
+ auto &ThisDim = AI.Dims.back();
+ auto &FlangDim = Flang->GetDimension(I);
+ long SizeFactor = 1;
+ if (FlangDim.ByteStride() % AI.ElementSize != 0) {
+ if (I == 0) {
+ SizeFactor =
+ AI.ElementSize / std::gcd(AI.ElementSize, FlangDim.ByteStride());
+ AI.ElementSize /= SizeFactor;
+ } else {
+ REPORT_FATAL() << Loc << "Invalid array stride";
+ }
+ }
+ ThisDim.Offset = 0;
+ ThisDim.Stride = FlangDim.ByteStride() / AI.ElementSize;
+ ThisDim.Size = SizeFactor * FlangDim.Extent();
+ }
+ }
+
+ using LiteralArg = void *;
+ using ArgMappingInfoTy =
+ std::variant<DescAndMemMappingInfoTy, MemMappingInfoTy, LiteralArg>;
+ using ArgMappingInfosTy = std::vector<ArgMappingInfoTy>;
+
+ ArgMappingInfosTy getMappingInfos(ident_t *Loc, void *Ptr) {
+ if (Flang) {
+ auto DMI = DescMappingInfoTy{Flang->SizeInBytes(),
+ offsetof(CFI_cdesc_t, base_addr)};
+ if (!Flang->IsAllocated()) {
+ ODBG() << "Is not allocated - nothing to map.";
+ ArgMappingInfosTy MIs;
+ MIs.emplace_back(DescAndMemMappingInfoTy{DMI, std::nullopt});
+ return MIs;
+ }
+
+ ArrayInfo AI;
+ AI.ElementSize = Flang->ElementBytes();
+ if (AI.ElementSize == 0 && Acc) {
+ AI.ElementSize = Acc->ElementSize;
+ }
+ AI.setPtr(Flang->OffsetElement(0));
+
+ if (Acc) {
+ collectAccBounds(Loc, AI);
+ } else {
+ collectFlangBounds(Loc, AI);
+ }
+ AI.normalize();
+ AI.computeSizeFromDims(Loc);
+
+ auto MMI = MemMappingInfoTy{};
+ MMI.RawMemoryPtr = AI.RawMemoryAddr;
+ MMI.RawMemoryBasePtr = Flang->OffsetElement(0);
+ MMI.RawMemorySize = AI.RawMemorySize;
+ MMI.CopyDesc = AI.generateNonContigCopyDesc(Loc);
+
+ ArgMappingInfosTy MIs;
+ MIs.emplace_back(DescAndMemMappingInfoTy{DMI, std::move(MMI)});
+ return MIs;
+ } else if (MemRef) {
+ if (Acc) {
+ REPORT_FATAL() << Loc << "Unsupported: MemRef with OpenACC bounds";
+ }
+
+ ArgMappingInfosTy MIs;
+
+ uint64_t Extent = 1LL;
+ for (ssize_t I = (ssize_t)MemRef->rank - 1; I >= 0; I--) {
+ Extent *= MemRef->sizes[I];
+ if (Extent != MemRef->strides[I] * MemRef->sizes[I]) {
+ REPORT_FATAL() << Loc << "Invalid memref descriptor";
+ }
+ }
+ Extent *= MemRef->elementSize;
+
+ {
+ auto MMI = MemMappingInfoTy{};
+ MMI.RawMemoryPtr = MemRef->allocatedPtr;
+ MMI.RawMemoryBasePtr = MemRef->allocatedPtr;
+ MMI.RawMemorySize = Extent;
+ MIs.push_back(std::move(MMI));
+ }
+ {
+ auto MMI = MemMappingInfoTy{};
+ MMI.RawMemoryPtr = MemRef->alignedPtr;
+ MMI.RawMemoryBasePtr = MemRef->allocatedPtr;
+ MMI.RawMemorySize = Extent;
+ MIs.push_back(std::move(MMI));
+ }
+
+ MIs.push_back(reinterpret_cast<void *>(MemRef->offset));
+
+ for (size_t I = 0; I < MemRef->rank; I++) {
+ MIs.push_back(reinterpret_cast<void *>(MemRef->sizes[I]));
+ MIs.push_back(reinterpret_cast<void *>(MemRef->strides[I]));
+ }
+
+ return MIs;
+ } else if (Acc) {
+ ArrayInfo AI;
+ AI.ElementSize = Acc->ElementSize;
+ AI.setPtr(Ptr);
+ collectAccBounds(Loc, AI);
+ AI.normalize();
+ AI.computeSizeFromDims(Loc);
+
+ auto MMI = MemMappingInfoTy{};
+ MMI.RawMemoryPtr = AI.RawMemoryAddr;
+ MMI.RawMemoryBasePtr = Ptr;
+ MMI.RawMemorySize = AI.RawMemorySize;
+ MMI.CopyDesc = AI.generateNonContigCopyDesc(Loc);
+
+ ArgMappingInfosTy MIs;
+ MIs.push_back(std::move(MMI));
+ return MIs;
+ } else {
+ REPORT_FATAL() << Loc << "Unknown case.";
+ abort();
+ }
+ }
+
+ void dataBeginPrivate(ident_t *Loc, void *ArgPtr, int64_t ArgSize,
+ bool HasFlagTo, DeviceTy &Device,
+ AsyncInfoTy &AsyncInfo,
+ MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+ KernelArgsMappingInfoTy &KI) {
+ assert(!MemRef);
+
+ if (Flang) {
+ REPORT_FATAL() << "TODO Flang descriptor on private variable";
+ } else if (Acc) {
+ assert(ArgPtr);
+ ArrayInfo AI;
+ AI.ElementSize = Acc->ElementSize;
+ AI.setPtr(ArgPtr);
+ collectAccBounds(Loc, AI);
+ AI.normalize();
+ AI.computeSizeFromDims(Loc);
+
+ assert(AI.RawMemorySize);
+ size_t DataSize = *AI.RawMemorySize;
+ void *HostData = AI.RawMemoryAddr;
+ ptrdiff_t Offset = reinterpret_cast<intptr_t>(HostData) -
+ reinterpret_cast<intptr_t>(ArgPtr);
+
+ ODBG(ADT_Interface) << "ACC firstprivate (partial): dataSize=" << DataSize
+ << " hostData=" << HostData << " (base=" << ArgPtr
+ << ")";
+
+ void *PrivateMemory =
+ Device.allocData(DataSize, nullptr, TARGET_ALLOC_DEVICE);
+ Device.submitData(PrivateMemory, HostData, DataSize, AsyncInfo,
+ /*Entry=*/nullptr, &HDTTMap);
+ KI.addArg(static_cast<char *>(PrivateMemory) - Offset);
+ KI.addLaunchAlloc(PrivateMemory);
+ } else {
+ REPORT_FATAL() << Loc << "Unknown descriptor type for private variable";
+ }
+ }
+
+ void dataBeginDevPtr(ident_t *Loc, DeviceTy &Device, AsyncInfoTy &AsyncInfo,
+ MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+ KernelArgsMappingInfoTy &KI) {
+ assert(Flang && !MemRef);
+ size_t DescSize = Flang->SizeInBytes();
+ void *DevDesc = Device.allocData(DescSize, nullptr, TARGET_ALLOC_DEVICE);
+ Device.submitData(DevDesc,
+ const_cast<void *>(static_cast<const void *>(Flang)),
+ DescSize, AsyncInfo, /*Entry=*/nullptr, &HDTTMap);
+ KI.addArg(DevDesc);
+ KI.addLaunchAlloc(DevDesc);
+ }
+
+ void dataBegin(ident_t *Loc, void *ArgPtr, void *DescriptorAddr,
+ void *&ParentAllocation, bool IsPtrAndObj, char *ArgName,
+ bool HasFlagTo, bool IsNoCreate, AccRefCountingType MapType,
+ AsyncInfoTy &AsyncInfo, DeviceTy &Device,
+ MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+ KernelArgsMappingInfoTy *KI) {
+ auto AddArg = [&](TargetPointerResultTy &TPR, void *TgtArg, void *HstArg) {
+ if (KI) {
+ if (TPR.isPresent()) {
+ KI->addArg(TgtArg);
+ } else {
+ assert(IsNoCreate);
+ KI->addArg(HstArg);
+ }
+ }
+ };
+ auto MapWithDesc = [&](MemMappingInfoTy &MemInfo, void *BasePtr,
+ bool IsParam) -> void * {
+ assert(MemInfo.CopyDesc);
+ ODBG() << "Will use non-contig copy.";
+
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+ HDTTMap, MemInfo.RawMemoryPtr, MemInfo.RawMemoryBasePtr, 0,
+ &*MemInfo.CopyDesc, ArgName, HasFlagTo,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/false,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+ IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ if (IsParam)
+ AddArg(TPR,
+ reinterpret_cast<void *>(
+ (reinterpret_cast<intptr_t>(TPR.TargetPointer) -
+ MemInfo.getBaseDelta())),
+ MemInfo.RawMemoryPtr);
+ return TPR.TargetPointer;
+ };
+
+ auto MapInfos = getMappingInfos(Loc, ArgPtr);
+ auto DescAndMemCase = [&](DescAndMemMappingInfoTy &MapInfo) {
+ ODBG() << "Mapping desc and mem";
+ auto &DescInfo = MapInfo.Desc;
+
+ void *DescTgtPtr = nullptr;
+ if (!ParentAllocation)
+ ParentAllocation = DescriptorAddr;
+
+ {
+ // Always copy the descriptor to device. It is needed regardless of the
+ // user-specified TO/FROM, and regardless of whether no_create is on or
+ // not as the no_create can refer to the raw memory in the descriptor.
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+ HDTTMap, DescriptorAddr, DescriptorAddr, 0, (int64_t)DescInfo.DescriptorSize,
+ ArgName, /*HasFlagTo=*/true,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/false,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+ /*IsNoCreate=*/false, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ DescTgtPtr = TPR.TargetPointer;
+ AddArg(TPR, DescTgtPtr, DescriptorAddr);
+ }
+
+ void *MemTgtPtr = nullptr;
+ if (MapInfo.Memory) {
+ auto &MemInfo = *MapInfo.Memory;
+ void *BasePtr =
+ static_cast<char *>(DescriptorAddr) + DescInfo.RawMemoryPtrOffset;
+ if (MemInfo.RawMemorySize) {
+ if (MemInfo.CopyDesc) {
+ MemTgtPtr = MapWithDesc(MemInfo, BasePtr, false);
+ } else {
+ TargetPointerResultTy TPR =
+ Device.getMappingInfo().getTargetPointer(
+ HDTTMap, MemInfo.RawMemoryPtr, BasePtr, 0,
+ (int64_t)*MemInfo.RawMemorySize, ArgName, HasFlagTo,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/false,
+ /*HasHoldModifier=*/MapType ==
+ AccRefCountingType::Structured,
+ IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ MemTgtPtr = TPR.TargetPointer;
+ }
+ } else {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+ HDTTMap, MemInfo.RawMemoryPtr, BasePtr, 0, (int64_t)*MemInfo.RawMemorySize,
+ ArgName, HasFlagTo,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/true,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+ IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ MemTgtPtr = TPR.TargetPointer;
+ }
+
+ if (MemTgtPtr && DescTgtPtr) {
+ LookupResult DescLR = Device.getMappingInfo().lookupMapping(
+ HDTTMap, DescriptorAddr, DescInfo.DescriptorSize);
+ auto *DescEntry = DescLR.TPR.getEntry();
+ if (DescEntry) {
+ uintptr_t TgtDescBase =
+ DescEntry->TgtPtrBegin +
+ (reinterpret_cast<uintptr_t>(DescriptorAddr) -
+ DescEntry->HstPtrBegin);
+ void **TgtPtrAddr = reinterpret_cast<void **>(
+ TgtDescBase + DescInfo.RawMemoryPtrOffset);
+ void **HstPtrAddr = reinterpret_cast<void **>(
+ reinterpret_cast<uintptr_t>(DescriptorAddr) +
+ DescInfo.RawMemoryPtrOffset);
+
+ void *HstBaseAddr = *HstPtrAddr;
+ void *TgtPteeBase = reinterpret_cast<void *>(
+ reinterpret_cast<uintptr_t>(MemTgtPtr) +
+ (reinterpret_cast<uintptr_t>(HstBaseAddr) -
+ reinterpret_cast<uintptr_t>(MemInfo.RawMemoryPtr)));
+
+ if (DescEntry->addShadowPointer(
+ ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase,
+ static_cast<int64_t>(sizeof(void *))})) {
+ ODBG() << "DescAndMemCase attach: device field " << TgtPtrAddr
+ << " -> " << TgtPteeBase;
+ void *&Buf = AsyncInfo.getVoidPtrLocation();
+ Buf = TgtPteeBase;
+ Device.submitData(TgtPtrAddr, &Buf, sizeof(void *), AsyncInfo,
+ DescEntry, &HDTTMap);
+ DescEntry->addEventIfNecessary(Device, AsyncInfo);
+ }
+ }
+ }
+ }
+ };
+ auto MemCase = [&](MemMappingInfoTy &MemInfo) {
+ if (MemInfo.RawMemorySize) {
+ if (MemInfo.CopyDesc) {
+ MapWithDesc(MemInfo, MemInfo.RawMemoryBasePtr, true);
+ } else {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+ HDTTMap, MemInfo.RawMemoryPtr, MemInfo.RawMemoryBasePtr, 0,
+ (int64_t)*MemInfo.RawMemorySize, ArgName, HasFlagTo,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/false,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+ IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ AddArg(TPR, TPR.TargetPointer, MemInfo.RawMemoryPtr);
+ }
+ } else {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+ HDTTMap, MemInfo.RawMemoryPtr, MemInfo.RawMemoryBasePtr, 0,
+ (int64_t)*MemInfo.RawMemorySize, ArgName, HasFlagTo,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/true,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+ IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ AddArg(TPR, TPR.TargetPointer, MemInfo.RawMemoryPtr);
+ }
+ };
+ auto LiteralCase = [&](void *Literal) {
+ if (KI)
+ KI->addArg(Literal);
+ };
+ for (auto &MapInfo : MapInfos) {
+ std::visit(overloads{DescAndMemCase, MemCase, LiteralCase}, MapInfo);
+ }
+ }
+
+ void dataEnd(ident_t *Loc, void *ArgPtr, void *DescriptorAddr,
+ void *ParentAllocation, int64_t ArgType, bool ForceDelete,
+ bool IsNoCreate, AccCopyOutType CopyType,
+ AccRefCountingType MapType, AsyncInfoTy &AsyncInfo,
+ DeviceTy &Device) {
+ auto MapWithDesc = [&](MemMappingInfoTy &MemInfo, void *BasePtr) {
+ if (MemInfo.CopyDesc) {
+ ODBG(ADT_Mapping) << "Will use non-contig copy.";
+
+ handleSingleDataEnd<NonContigDescTy &>(
+ Loc, BasePtr, MemInfo.RawMemoryPtr, *MemInfo.CopyDesc, ForceDelete,
+ IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+ }
+ };
+
+ auto MapInfos = getMappingInfos(Loc, ArgPtr);
+ auto DescAndMemCase = [&](DescAndMemMappingInfoTy &MapInfo) {
+ auto &DescInfo = MapInfo.Desc;
+
+ if (!ParentAllocation) {
+ ParentAllocation = DescriptorAddr;
+ }
+
+ handleSingleDataEnd<int64_t>(
+ Loc, DescriptorAddr, DescriptorAddr, DescInfo.DescriptorSize,
+ ForceDelete, IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+
+ if (MapInfo.Memory) {
+ auto &MemInfo = *MapInfo.Memory;
+ void *BasePtr =
+ static_cast<char *>(DescriptorAddr) + DescInfo.RawMemoryPtrOffset;
+ if (MemInfo.RawMemorySize) {
+ if (MemInfo.CopyDesc) {
+ MapWithDesc(MemInfo, BasePtr);
+ } else {
+ handleSingleDataEnd<int64_t>(
+ Loc, BasePtr, MemInfo.RawMemoryPtr, *MemInfo.RawMemorySize,
+ ForceDelete, IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+ }
+ } else {
+ handleSingleDataEnd<int64_t>(Loc, BasePtr, MemInfo.RawMemoryPtr, 0,
+ ForceDelete, IsNoCreate, CopyType,
+ MapType, AsyncInfo, Device);
+ }
+ }
+ };
+ auto MemCase = [&](MemMappingInfoTy &MemInfo) {
+ if (MemInfo.RawMemorySize) {
+ if (MemInfo.CopyDesc) {
+ MapWithDesc(MemInfo, MemInfo.RawMemoryBasePtr);
+ } else {
+ handleSingleDataEnd<int64_t>(
+ Loc, MemInfo.RawMemoryBasePtr, MemInfo.RawMemoryPtr,
+ *MemInfo.RawMemorySize, ForceDelete, IsNoCreate, CopyType,
+ MapType, AsyncInfo, Device);
+ }
+ } else {
+ handleSingleDataEnd<int64_t>(
+ Loc, MemInfo.RawMemoryBasePtr, MemInfo.RawMemoryPtr, 0, ForceDelete,
+ IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+ }
+ };
+ auto LiteralCase = [&](void *Literal) {};
+ for (auto &MapInfo : MapInfos) {
+ std::visit(overloads{DescAndMemCase, MemCase, LiteralCase}, MapInfo);
+ }
+ }
+
+ void dataUpdate(ident_t *Loc, void *ArgPtr, int64_t ArgType,
+ AsyncInfoTy &AsyncInfo, DeviceTy &Device) {
+ const bool HasFlagTo = ArgType & TGT_ACC_MAPTYPE_TO;
+ const bool HasFlagFrom = ArgType & TGT_ACC_MAPTYPE_FROM;
+
+ auto LookupMapping = [&](void *HstPtr,
+ int64_t Size) -> TargetPointerResultTy {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+ HstPtr, Size, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false,
+ /*MustContain=*/true);
+ if (!TPR.isPresent()) {
+ if (ArgType & TGT_ACC_MAPTYPE_IF_PRESENT) {
+ ODBG(ADT_Interface) << "Not present, if_present - skipping update.";
+ return TPR;
+ }
+ REPORT_FATAL() << "Device mapping does not exist for update at " << Loc;
+ }
+ return TPR;
+ };
+
+ auto DoContiguousUpdate = [&](void *HstPtr, int64_t Size) {
+ TargetPointerResultTy TPR = LookupMapping(HstPtr, Size);
+ if (!TPR.isPresent())
+ return;
+ void *TgtPtr = TPR.TargetPointer;
+ if (HasFlagTo) {
+ ODBG(ADT_Interface) << "Update TO: " << Size << " bytes hst:" << HstPtr
+ << " -> tgt:" << TgtPtr;
+ Device.submitData(TgtPtr, HstPtr, Size, AsyncInfo, TPR.getEntry());
+ }
+ if (HasFlagFrom) {
+ ODBG(ADT_Interface) << "Update FROM: " << Size
+ << " bytes tgt:" << TgtPtr << " -> hst:" << HstPtr;
+ Device.retrieveData(HstPtr, TgtPtr, Size, AsyncInfo, TPR.getEntry());
+ }
+ };
+
+ auto DoNonContigUpdate = [&](MemMappingInfoTy &MemInfo) {
+ ODBG(ADT_Interface) << "Will use non-contig update.";
+
+ int64_t AllocSize = MemInfo.CopyDesc->getAllocSize();
+
+ TargetPointerResultTy TPR =
+ LookupMapping(MemInfo.RawMemoryPtr, AllocSize);
+ if (!TPR.isPresent())
+ return;
+ void *TgtPtr = TPR.TargetPointer;
+ if (HasFlagTo) {
+ ODBG(ADT_Interface)
+ << "Non-contig update TO: hst:" << MemInfo.RawMemoryPtr
+ << " -> tgt:" << TgtPtr;
+ Device.submitNonContigData(TgtPtr, MemInfo.RawMemoryPtr,
+ *MemInfo.CopyDesc, AsyncInfo,
+ TPR.getEntry());
+ }
+ if (HasFlagFrom) {
+ ODBG(ADT_Interface) << "Non-contig update FROM: tgt:" << TgtPtr
+ << " -> hst:" << MemInfo.RawMemoryPtr;
+ Device.retrieveNonContigData(MemInfo.RawMemoryPtr, TgtPtr,
+ *MemInfo.CopyDesc, AsyncInfo,
+ TPR.getEntry());
+ }
+ };
+
+ auto MapInfos = getMappingInfos(Loc, ArgPtr);
+ for (auto &MapInfo : MapInfos) {
+ if (auto *DM = std::get_if<DescAndMemMappingInfoTy>(&MapInfo)) {
+ if (DM->Memory && DM->Memory->RawMemorySize) {
+ if (DM->Memory->CopyDesc) {
+ DoNonContigUpdate(*DM->Memory);
+ } else {
+ DoContiguousUpdate(DM->Memory->RawMemoryPtr,
+ *DM->Memory->RawMemorySize);
+ }
+ }
+ } else if (auto *MM = std::get_if<MemMappingInfoTy>(&MapInfo)) {
+ if (MM->RawMemorySize) {
+ if (MM->CopyDesc) {
+ DoNonContigUpdate(*MM);
+ } else {
+ DoContiguousUpdate(MM->RawMemoryPtr, *MM->RawMemorySize);
+ }
+ }
+ }
+ }
+ }
+};
+
+const uint64_t *getMemRefSizes(const MemRefDesc *Desc) {
+ return &Desc->sizes[0];
+}
+const uint64_t *getMemRefStrides(const MemRefDesc *Desc, unsigned Rank) {
+ return &Desc->sizes[0] + Rank;
+}
+
+ArgDescriptorsTy parseArgDescs(ident_t *Loc, const AccDataDesc *ArgDesc) {
+ ArgDescriptorsTy Descs;
+ if (!ArgDesc)
+ return Descs;
+
+ if (ArgDesc->Version & TGT_ACC_DESC_F18) {
+ Descs.Flang = reinterpret_cast<decltype(Descs.Flang)>(
+ ((const AccDataDescF18 *)ArgDesc)->FortranDescriptor);
+ }
+ if (ArgDesc->Version & TGT_ACC_DESC_MEMREF) {
+ const MemRefDesc *DescMemRef =
+ ((const AccDataDescMemRef *)ArgDesc)->MemRefDescriptor;
+ Descs.MemRef = MaterializedMemRefDesc{};
+ Descs.MemRef->allocatedPtr = DescMemRef->allocatedPtr;
+ Descs.MemRef->alignedPtr = DescMemRef->alignedPtr;
+ Descs.MemRef->offset = DescMemRef->offset;
+ Descs.MemRef->rank = ((const AccDataDescMemRef *)ArgDesc)->Rank;
+ Descs.MemRef->sizes = getMemRefSizes(DescMemRef);
+ Descs.MemRef->strides = getMemRefStrides(DescMemRef, Descs.MemRef->rank);
+ Descs.MemRef->elementSize =
+ ((const AccDataDescMemRef *)ArgDesc)->ElementSize;
+ }
+ if (ArgDesc->Version & TGT_ACC_DESC_OPENACC) {
+ int64_t DescPadding = 0;
+ if (Descs.Flang) {
+ DescPadding = sizeof(CFI_cdesc_t *);
+ } else if (Descs.MemRef) {
+ REPORT_FATAL() << Loc << "Unsupported: MemRef with OpenACC bounds";
+ } else {
+ DescPadding = 0;
+ }
+ Descs.Acc = reinterpret_cast<const AccDataDescOpenACC *>(
+ reinterpret_cast<const char *>(ArgDesc) + DescPadding);
+ }
+ return Descs;
+}
+
+ArgDescriptorsTy parseAndVerifyArgDescs(ident_t *Loc,
+ const AccDataDesc *ArgDesc) {
+ ArgDescriptorsTy Descs = parseArgDescs(Loc, ArgDesc);
+ ODBG_IF([&]() { Descs.dump(llvm::dbgs()); });
+ Descs.verify();
+ return Descs;
+}
+
+void accTargetDataBegin(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+ int64_t ArgSize, int64_t ArgType, char *ArgName,
+ AccDataDesc *ArgDesc, AccRefCountingType MapType,
+ AsyncInfoTy &AsyncInfo, DeviceTy &Device,
+ MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+ KernelArgsMappingInfoTy *KI = nullptr) {
+ // clang-format off
+ ODBG(ADT_Interface)
+ << "targetDataBegin "
+ << "ArgName=" << getNameFromMapping(ArgName) << ", "
+ << "ArgBasePtr=" << ArgBasePtr << ", "
+ << "ArgPtr=" << ArgPtr << ", "
+ << "ArgSize=" << ArgSize << ", "
+ << "ArgType=" << mapTypeToString(ArgType)
+ << " (" << llvm::format_hex(ArgType, 0) << "), "
+ << "ArgDesc=" << ArgDesc;
+ // clang-format on
+
+ // OpenACC 3.4: `if_present` is only valid on `host_data` and `update`
+ // directives.
+ assert(!(ArgType & TGT_ACC_MAPTYPE_IF_PRESENT));
+ assert(!!ArgBasePtr == !!(ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ));
+
+ bool IsNoCreate = ArgType & TGT_ACC_MAPTYPE_NO_CREATE;
+ auto AddArg = [&](TargetPointerResultTy &TPR, void *TgtArg, void *HstArg) {
+ if (KI) {
+ if (TPR.isPresent()) {
+ KI->addArg(TgtArg);
+ } else {
+ assert(IsNoCreate);
+ KI->addArg(HstArg);
+ }
+ }
+ };
+
+ if (ArgType & TGT_ACC_MAPTYPE_DEVPTR) {
+ if (!KI) {
+ ODBG(ADT_Interface) << "DEVPTR arg in non-kernel context - ignoring.";
+ return;
+ }
+ if (!ArgDesc) {
+ void *LiteralValue = *reinterpret_cast<void ***>(ArgPtr);
+ ODBG(ADT_Interface) << "Got literal device pointer: " << LiteralValue;
+ KI->addArg(LiteralValue);
+ return;
+ }
+ // DEVPTR with a descriptor. The kernel is compiled to receive a device-side
+ // descriptor as a pointer.
+ ODBG(ADT_Interface) << "DEVPTR with descriptor";
+ ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+ Descs.dataBeginDevPtr(Loc, Device, AsyncInfo, HDTTMap, *KI);
+ return;
+ }
+
+ const bool HasFlagTo = ArgType & TGT_ACC_MAPTYPE_TO;
+ if (ArgType & TGT_ACC_MAPTYPE_PRIVATE) {
+ assert(KI && "Private arg should only appear on kernels");
+
+ int64_t BaseAllocSize = ArgSize;
+
+ if (BaseAllocSize <= 0)
+ REPORT_FATAL() << "Invalid private variable size";
+
+ int64_t NumPrivate = 1;
+ if ((ArgType & TGT_ACC_MAPTYPE_GANG_PRIVATE))
+ NumPrivate *= KI->KernelArgs.NumGangs[0] * KI->KernelArgs.NumGangs[1] *
+ KI->KernelArgs.NumGangs[2];
+ if ((ArgType & TGT_ACC_MAPTYPE_WORKER_PRIVATE))
+ NumPrivate *= KI->KernelArgs.NumWorkers;
+ if ((ArgType & TGT_ACC_MAPTYPE_VECTOR_PRIVATE))
+ NumPrivate *= KI->KernelArgs.VectorLength;
+
+ if (ArgDesc) {
+ if (NumPrivate != 1)
+ REPORT_FATAL() << Loc << " Multi-dim private array variable is invalid";
+ ODBG() << "Arg desc on private variable";
+ ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+ Descs.dataBeginPrivate(Loc, ArgPtr, ArgSize, HasFlagTo, Device, AsyncInfo,
+ HDTTMap, *KI);
+ } else {
+ void *PrivateMemory = Device.allocData(BaseAllocSize * NumPrivate,
+ nullptr, TARGET_ALLOC_DEFAULT);
+ ODBG(ADT_Interface) << "Allocated private memory with size "
+ << BaseAllocSize << " (" << NumPrivate
+ << " instances) at " << PrivateMemory;
+
+ if (HasFlagTo) {
+ if (NumPrivate != 1)
+ REPORT_FATAL() << Loc
+ << " Multi-dim private variable with copy is invalid";
+
+ assert(ArgPtr);
+ Device.submitData(PrivateMemory, ArgPtr, ArgSize, AsyncInfo,
+ /*Entry=*/nullptr, &HDTTMap);
+ }
+ KI->addArg(PrivateMemory);
+ KI->addLaunchAlloc(PrivateMemory);
+ }
+
+ return;
+ }
+
+ if (ArgType & TGT_ACC_MAPTYPE_LITERAL) {
+ assert(KI && "Literal arg should only appear on kernels");
+ assert(ArgSize && "We need size information to pass in literal args");
+ assert(!ArgDesc);
+ // Our codegen uses indirection for literal args.
+ if (ArgSize <= (int)sizeof(void *)) {
+ // If it is possible to type pun to pointer (i.e. the type width is no
+ // bigger than a pointer, then pass it in literally.
+ void *LiteralValue = *reinterpret_cast<void ***>(ArgPtr);
+ KI->addArg(LiteralValue);
+ return;
+ } else {
+ REPORT_FATAL() << "TODO need to move memory to device";
+ // KI->addArg(DeviceArgPtr);
+ return;
+ }
+ }
+
+ assert(ArgPtr && "We need to have a pointer for data mapping");
+
+ void *ParentAllocation = nullptr;
+ void *DescriptorAddr;
+ bool IsPtrAndObj = ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ;
+ if (IsPtrAndObj) {
+ ODBG() << "We got a parent object.";
+ assert(ArgBasePtr);
+ if (Device.getMappingInfo().getTgtPtrBegin(HDTTMap, ArgBasePtr, 1)) {
+ ParentAllocation = ArgBasePtr;
+ DescriptorAddr = ArgBasePtr;
+ } else {
+ // PTR_AND_OBJ but parent not present on device (e.g. enter data copyin
+ // of a pointer component without its parent struct). Data is already
+ // mapped standalone; skip descriptor attach.
+ ODBG() << "Parent not present on device - mapping standalone.";
+ IsPtrAndObj = false;
+ DescriptorAddr = ArgPtr;
+ }
+ } else {
+ DescriptorAddr = ArgPtr;
+ }
+
+ if (ArgSize > 0) {
+ ODBG() << "We got size from the compiler - no descriptor parsing needed.";
+
+ if (IsPtrAndObj) {
+ // Map the pointee data, then release entry lock before looking up
+ // the parent for pointer attachment.
+ void *MemTgtPtr = nullptr;
+ {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+ HDTTMap, ArgPtr, ArgPtr, 0, ArgSize, ArgName, HasFlagTo,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/false,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+ IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ AddArg(TPR, TPR.TargetPointer, ArgPtr);
+ if (TPR.isPresent())
+ MemTgtPtr = TPR.TargetPointer;
+ else
+ assert(IsNoCreate);
+ }
+
+ // Update the parent's pointer field on device.
+ if (MemTgtPtr) {
+ LookupResult ParentLR = Device.getMappingInfo().lookupMapping(
+ HDTTMap, DescriptorAddr, sizeof(void *));
+ if (ParentLR.TPR.getEntry()) {
+ void **HstPtrAddr = reinterpret_cast<void **>(DescriptorAddr);
+ uintptr_t TgtDescAddr = ParentLR.TPR.getEntry()->TgtPtrBegin +
+ (reinterpret_cast<uintptr_t>(DescriptorAddr) -
+ ParentLR.TPR.getEntry()->HstPtrBegin);
+ void **TgtPtrAddr = reinterpret_cast<void **>(TgtDescAddr);
+
+ void *HstPteeBase = *HstPtrAddr;
+ void *TgtPteeBase = reinterpret_cast<void *>(
+ reinterpret_cast<uintptr_t>(MemTgtPtr) -
+ (reinterpret_cast<uintptr_t>(ArgPtr) -
+ reinterpret_cast<uintptr_t>(HstPteeBase)));
+
+ if (ParentLR.TPR.getEntry()->addShadowPointer(
+ ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase,
+ static_cast<int64_t>(sizeof(void *))})) {
+ ODBG() << "PTR_AND_OBJ attach: device field " << TgtPtrAddr
+ << " -> " << TgtPteeBase;
+ void *&Buf = AsyncInfo.getVoidPtrLocation();
+ Buf = TgtPteeBase;
+ Device.submitData(TgtPtrAddr, &Buf, sizeof(void *), AsyncInfo,
+ ParentLR.TPR.getEntry(), &HDTTMap);
+ ParentLR.TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo);
+ }
+ }
+ }
+
+ } else {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+ HDTTMap, ArgPtr, ArgPtr, 0, ArgSize, ArgName, HasFlagTo,
+ /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+ /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+ /*HasPresentModifier=*/false,
+ /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+ IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+ /*ReleaseHDTTMap=*/false);
+ AddArg(TPR, TPR.TargetPointer, ArgPtr);
+ }
+ return;
+ }
+
+ ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+ Descs.dataBegin(Loc, ArgPtr, DescriptorAddr, ParentAllocation, IsPtrAndObj,
+ ArgName, HasFlagTo, IsNoCreate, MapType, AsyncInfo, Device,
+ HDTTMap, KI);
+}
+
+bool isPresent(DeviceTy &Device, void *Ptr) {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+ Ptr, 1, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false);
+ return TPR.isPresent();
+}
+
+void accTargetDataEnd(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+ int64_t ArgSize, int64_t ArgType, char *ArgName,
+ AccDataDesc *ArgDesc, AccRefCountingType MapType,
+ AsyncInfoTy &AsyncInfo, DeviceTy &Device) {
+ // clang-format off
+ ODBG(ADT_Interface)
+ << "targetDataEnd "
+ << "ArgName=" << getNameFromMapping(ArgName) << ", "
+ << "ArgBasePtr=" << ArgBasePtr << ", "
+ << "ArgPtr=" << ArgPtr << ", "
+ << "ArgSize=" << ArgSize << ", "
+ << "ArgType=" << mapTypeToString(ArgType)
+ << " (" << llvm::format_hex(ArgType, 0) << "), "
+ << "ArgDesc=" << ArgDesc;
+ // clang-format on
+
+ // OpenACC 3.4: `if_present` is only valid on `host_data` and `update`
+ // directives.
+ assert(!(ArgType & TGT_ACC_MAPTYPE_IF_PRESENT));
+
+ assert(!!ArgBasePtr == !!(ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ));
+
+ // These types are only for kernel launches
+ if ((ArgType & TGT_ACC_MAPTYPE_VECTOR_PRIVATE) ||
+ (ArgType & TGT_ACC_MAPTYPE_GANG_PRIVATE) ||
+ (ArgType & TGT_ACC_MAPTYPE_WORKER_PRIVATE) ||
+ (ArgType & TGT_ACC_MAPTYPE_LITERAL) ||
+ (ArgType & TGT_ACC_MAPTYPE_DEVPTR) ||
+ (ArgType & TGT_ACC_MAPTYPE_PRIVATE)) {
+ ODBG(ADT_Interface) << "Kernel launch argument - ignoring.";
+ return;
+ }
+
+ assert(ArgPtr && "We need to have a pointer for data mapping");
+
+ void *ParentAllocation = nullptr;
+ void *DescriptorAddr;
+ bool IsPtrAndObj = ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ;
+ if (IsPtrAndObj) {
+ ODBG(ADT_Mapping) << "We got a parent object.";
+ assert(ArgBasePtr);
+ if (isPresent(Device, ArgBasePtr)) {
+ ParentAllocation = ArgBasePtr;
+ DescriptorAddr = ArgBasePtr;
+ } else {
+ // PTR_AND_OBJ but parent not present on device (e.g. enter data copyin
+ // of a pointer component without its parent struct). Data is already
+ // mapped standalone; skip descriptor attach.
+ ODBG(ADT_Mapping) << "Parent not present on device - mapping standalone.";
+ IsPtrAndObj = false;
+ DescriptorAddr = ArgPtr;
+ }
+ } else {
+ DescriptorAddr = ArgPtr;
+ }
+
+ const bool ForceDelete = ArgType & TGT_ACC_MAPTYPE_FINALIZE;
+ const bool HasFlagFrom = ArgType & TGT_ACC_MAPTYPE_FROM;
+ const bool IsNoCreate = ArgType & TGT_ACC_MAPTYPE_NO_CREATE;
+ AccCopyOutType CopyType = AccCopyOutType::Never;
+ if (HasFlagFrom)
+ CopyType = AccCopyOutType::OnDelete;
+ if (ArgSize > 0) {
+ ODBG(ADT_Mapping)
+ << "We got size from the compiler - no descriptor parsing needed.";
+
+ handleSingleDataEnd<int64_t>(Loc, DescriptorAddr, ArgPtr, ArgSize,
+ ForceDelete, IsNoCreate, CopyType, MapType,
+ AsyncInfo, Device);
+ return;
+ }
+
+ ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+ Descs.dataEnd(Loc, ArgPtr, DescriptorAddr, ParentAllocation, ArgType,
+ ForceDelete, IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+}
+
+void accTargetDataUpdate(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+ int64_t ArgSize, int64_t ArgType, char *ArgName,
+ AccDataDesc *ArgDesc, AsyncInfoTy &AsyncInfo,
+ DeviceTy &Device) {
+ // clang-format off
+ ODBG(ADT_Interface)
+ << "update "
+ << "ArgName=" << getNameFromMapping(ArgName) << ", "
+ << "ArgPtr=" << ArgPtr << ", "
+ << "ArgSize=" << ArgSize << ", "
+ << "ArgType=" << mapTypeToString(ArgType)
+ << " (" << llvm::format_hex(ArgType, 0) << "), "
+ << "ArgDesc=" << ArgDesc;
+ // clang-format on
+
+ if ((ArgType & TGT_ACC_MAPTYPE_LITERAL) ||
+ (ArgType & TGT_ACC_MAPTYPE_PRIVATE) || (ArgType & TGT_ACC_MAPTYPE_DEVPTR))
+ return;
+
+ if (!ArgPtr)
+ return;
+
+ if (ArgSize > 0) {
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+ ArgPtr, ArgSize, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false,
+ /*MustContain=*/true);
+ if (!TPR.isPresent()) {
+ if (ArgType & TGT_ACC_MAPTYPE_IF_PRESENT) {
+ ODBG(ADT_Interface) << "Not present, if_present - skipping update.";
+ return;
+ }
+ REPORT_FATAL() << "Device mapping does not exist for update at " << Loc;
+ }
+ void *TgtPtr = TPR.TargetPointer;
+ if (ArgType & TGT_ACC_MAPTYPE_TO) {
+ ODBG(ADT_Interface) << "Update TO: " << ArgSize << " bytes hst:" << ArgPtr
+ << " -> tgt:" << TgtPtr;
+ Device.submitData(TgtPtr, ArgPtr, ArgSize, AsyncInfo, TPR.getEntry());
+ }
+ if (ArgType & TGT_ACC_MAPTYPE_FROM) {
+ ODBG(ADT_Interface) << "Update FROM: " << ArgSize
+ << " bytes tgt:" << TgtPtr << " -> hst:" << ArgPtr;
+ Device.retrieveData(ArgPtr, TgtPtr, ArgSize, AsyncInfo, TPR.getEntry());
+ }
+ return;
+ }
+
+ ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+ Descs.dataUpdate(Loc, ArgPtr, ArgType, AsyncInfo, Device);
+}
+
+template <typename T>
+void withDeviceAndQueue(int64_t DeviceType, int64_t Async, T Callback) {
+ llvm::Expected<DeviceTy &> DeviceOrErr =
+ DM->getDevice(static_cast<acc_device_t>(DeviceType));
+ if (!DeviceOrErr)
+ REPORT_FATAL() << "Failed to get device: "
+ << toString(DeviceOrErr.takeError());
+
+ DeviceTy &Device = *DeviceOrErr;
+
+ ODBG(ADT_Interface) << "with device type " << DeviceType << " and async "
+ << asyncToString(Async);
+
+ if (Async == AccAsyncSync) {
+ AsyncInfoTy AsyncInfo(Device);
+ Callback(Device, AsyncInfo);
+ } else {
+ QueueAsyncInfoWrapperTy QueueAsyncInfo(Device, Async);
+ AsyncInfoTy &AsyncInfo = QueueAsyncInfo;
+ Callback(Device, AsyncInfo);
+ }
+}
+
+template <typename FuncTy, typename... ArgsTy>
+void forEachArg(FuncTy Func, bool Increasing, ident_t *Loc, uint32_t ArgNum,
+ void **ArgBasePtrs, void **ArgPtrs, int64_t *ArgSizes,
+ int64_t *ArgTypes, char **ArgNames, void **ArgMappers,
+ AccDataDesc **ArgDescs, ArgsTy &&...Args) {
+ assert(!ArgMappers && "we currently do not generate mappers");
+ ODBG(ADT_Interface) << "Got " << ArgNum << " args at " << Loc;
+ int32_t Start = Increasing ? 0 : ArgNum - 1;
+ int32_t End = Increasing ? ArgNum : -1;
+ int32_t Increment = Increasing ? 1 : -1;
+ for (int32_t I = Start; I != End; I += Increment) {
+ ODBG(ADT_Interface) << "Handling arg #" << I;
+ char *Name = ArgNames ? ArgNames[I] : nullptr;
+ Func(Loc, ArgBasePtrs[I], ArgPtrs[I], ArgSizes[I], ArgTypes[I], Name,
+ ArgDescs[I], Args...);
+ }
+}
+} // namespace
+
+namespace llvm::acc::target {
+void *accDataEnter(void *ArgBasePtr, void *ArgPtr, int64_t ArgSize,
+ int64_t ArgType, int64_t Async) {
+ void *Result = nullptr;
+ withDeviceAndQueue(
+ acc_device_default, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ {
+ AccKernelArgsTy KA = {};
+ KernelArgsMappingInfoTy KI{KA, {}, {}, {}};
+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+ Device.getMappingInfo()
+ .HostDataToTargetMap.getExclusiveAccessor();
+ accTargetDataBegin(nullptr, ArgBasePtr, ArgPtr, ArgSize, ArgType,
+ nullptr, nullptr, AccRefCountingType::Dynamic,
+ AsyncInfo, Device, HDTTMap, &KI);
+ assert(KI.Args.size() == 1);
+ Result = KI.Args[0];
+ }
+ dumpTargetPointerMappings(nullptr, Device);
+ });
+ return Result;
+}
+} // namespace llvm::acc::target
+
+EXTERN void __tgt_acc_declare(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs,
+ void **ArgPtrs, int64_t *ArgSizes,
+ int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs,
+ int64_t Async, __tgt_bin_desc *Desc) {
+ FUNC_LOGGER(Loc);
+ assert(!Desc);
+
+ withDeviceAndQueue(
+ DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ {
+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+ Device.getMappingInfo()
+ .HostDataToTargetMap.getExclusiveAccessor();
+ forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, ArgNum,
+ ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+ ArgMappers, ArgDescs, AccRefCountingType::Structured,
+ AsyncInfo, Device, HDTTMap, /*KI=*/nullptr);
+ }
+ dumpTargetPointerMappings(Loc, Device);
+ });
+}
+
+EXTERN void __tgt_acc_data_update(ident_t *Loc, int64_t Flags,
+ int64_t DeviceType, uint32_t ArgNum,
+ void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes,
+ char **ArgNames, void **ArgMappers,
+ AccDataDesc **ArgDescs, int64_t Async) {
+ FUNC_LOGGER(Loc);
+ withDeviceAndQueue(
+ DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ forEachArg(accTargetDataUpdate, /*Increasing=*/true, Loc, ArgNum,
+ ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+ ArgMappers, ArgDescs, AsyncInfo, Device);
+ });
+}
+
+EXTERN void __tgt_acc_data_enter(ident_t *Loc, int64_t Flags,
+ int64_t DeviceType, uint32_t ArgNum,
+ void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes,
+ char **ArgNames, void **ArgMappers,
+ AccDataDesc **ArgDescs, int64_t Async) {
+ FUNC_LOGGER(Loc);
+ withDeviceAndQueue(
+ DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ {
+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+ Device.getMappingInfo()
+ .HostDataToTargetMap.getExclusiveAccessor();
+ forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, ArgNum,
+ ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+ ArgMappers, ArgDescs, AccRefCountingType::Dynamic,
+ AsyncInfo, Device, HDTTMap, /*KI=*/nullptr);
+ }
+ dumpTargetPointerMappings(Loc, Device);
+ });
+}
+
+EXTERN void __tgt_acc_data_exit(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs,
+ void **ArgPtrs, int64_t *ArgSizes,
+ int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs,
+ int64_t Async) {
+ FUNC_LOGGER(Loc);
+ withDeviceAndQueue(
+ DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ forEachArg(accTargetDataEnd, /*Increasing=*/false, Loc, ArgNum,
+ ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+ ArgMappers, ArgDescs, AccRefCountingType::Dynamic, AsyncInfo,
+ Device);
+ dumpTargetPointerMappings(Loc, Device);
+ });
+}
+
+EXTERN void __tgt_acc_data_begin(ident_t *Loc, int64_t Flags,
+ int64_t DeviceType, uint32_t ArgNum,
+ void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes,
+ char **ArgNames, void **ArgMappers,
+ AccDataDesc **ArgDescs, int64_t Async) {
+ FUNC_LOGGER(Loc);
+ withDeviceAndQueue(
+ DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ {
+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+ Device.getMappingInfo()
+ .HostDataToTargetMap.getExclusiveAccessor();
+ forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, ArgNum,
+ ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+ ArgMappers, ArgDescs, AccRefCountingType::Structured,
+ AsyncInfo, Device, HDTTMap, /*KI=*/nullptr);
+ }
+ dumpTargetPointerMappings(Loc, Device);
+ });
+}
+
+EXTERN void __tgt_acc_data_end(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs,
+ void **ArgPtrs, int64_t *ArgSizes,
+ int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs,
+ int64_t Async) {
+ FUNC_LOGGER(Loc);
+ withDeviceAndQueue(
+ DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ forEachArg(accTargetDataEnd, /*Increasing=*/false, Loc, ArgNum,
+ ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+ ArgMappers, ArgDescs, AccRefCountingType::Structured,
+ AsyncInfo, Device);
+ dumpTargetPointerMappings(Loc, Device);
+ });
+}
+
+void *getDeviceEntryPtr(void *HostPtr, DeviceTy &Device) {
+ int32_t DeviceId = Device.DeviceID;
+ TableMap *TM = llvm::offload::getTableMap(HostPtr);
+ __tgt_target_table *TargetTable = nullptr;
+ {
+ std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
+ assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&
+ "Not expecting a device ID outside the table's bounds!");
+ TargetTable = TM->Table->TargetsTable[DeviceId];
+ }
+ assert(TargetTable && "Global data has not been mapped\n");
+
+ void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address;
+ ODBG(ADT_Kernel) << "Launching target execution "
+ << TargetTable->EntriesBegin[TM->Index].SymbolName
+ << " with pointer " << TgtEntryPtr << " (index=" << TM->Index
+ << ").";
+ return TgtEntryPtr;
+}
+
+EXTERN int __tgt_acc_kernel(ident_t *Loc, void *Kernel, int64_t Flags,
+ int64_t DeviceType, AccKernelArgsTy *Args,
+ int64_t Async, const char *KernelName,
+ __tgt_bin_desc *Desc) {
+ FUNC_LOGGER(Loc);
+ assert(!Desc);
+
+ withDeviceAndQueue(
+ DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+ Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
+
+ SmallVector<void *> TgtArgs;
+ SmallVector<ptrdiff_t> TgtOffsets;
+ KernelArgsMappingInfoTy KI{*Args, {}, {}, {}};
+ forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, Args->ArgNum,
+ Args->ArgBasePtrs, Args->ArgPtrs, Args->ArgSizes,
+ Args->ArgTypes, Args->ArgNames, Args->ArgMappers,
+ Args->ArgDescs, AccRefCountingType::Structured, AsyncInfo,
+ Device, HDTTMap, &KI);
+ HDTTMap.destroy();
+
+ KernelLaunchParamsTy LaunchParams = KI.getLaunchArgs();
+ KernelArgsTy DeviceArgs = {0};
+ DeviceArgs.Version = 4;
+ DeviceArgs.ArgPtrs = reinterpret_cast<void **>(&LaunchParams);
+ DeviceArgs.Flags.IsCUDA = true;
+ DeviceArgs.DynCGroupMem = Args->SmemSize;
+
+ DeviceArgs.UserNumBlocks[0] = Args->NumGangs[0];
+ DeviceArgs.UserNumBlocks[1] = Args->NumGangs[1];
+ DeviceArgs.UserNumBlocks[2] = Args->NumGangs[2];
+ DeviceArgs.UserThreadLimit[0] = Args->VectorLength;
+ DeviceArgs.UserThreadLimit[1] = Args->NumWorkers;
+ DeviceArgs.UserThreadLimit[2] = 1;
+
+ void *TgtEntryPtr = getDeviceEntryPtr(Kernel, Device);
+ ODBG(ADT_Interface)
+ << "Launching device kernel " << KernelName
+ << " with entry hst: " << Kernel << " tgt: " << TgtEntryPtr
+ << " with " << KI.Args.size() << " (" << Args->ArgNum << ") args";
+ ODBG(ADT_Interface) << "NumGangs " << Args->NumGangs[0] << ", "
+ << Args->NumGangs[1] << ", " << Args->NumGangs[2];
+ ODBG(ADT_Interface) << "VectorLength " << Args->VectorLength;
+ ODBG(ADT_Interface) << "NumWorkers " << Args->NumWorkers;
+ ODBG(ADT_Interface) << "SmemSize " << Args->SmemSize;
+
+ assert(KI.Args.size() * sizeof(void *) == LaunchParams.Size);
+ for (unsigned I = 0; I < KI.Args.size(); I++)
+ ODBG(ADT_Interface) << "Arg #" << I << ": " << KI.Args[I];
+
+ if (Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
+ DeviceArgs, nullptr,
+ AsyncInfo) != OFFLOAD_SUCCESS)
+ REPORT_FATAL() << "Kernel launch failed";
+
+ forEachArg(accTargetDataEnd, /*Increasing=*/false, Loc, Args->ArgNum,
+ Args->ArgBasePtrs, Args->ArgPtrs, Args->ArgSizes,
+ Args->ArgTypes, Args->ArgNames, Args->ArgMappers,
+ Args->ArgDescs, AccRefCountingType::Structured, AsyncInfo,
+ Device);
+
+ dumpTargetPointerMappings(Loc, Device);
+
+ auto LaunchAllocDeleter = [Device = &Device,
+ LaunchAllocs = KI.LaunchAllocs]() {
+ for (void *LaunchAlloc : LaunchAllocs)
+ if (int32_t Ret = Device->deleteData(LaunchAlloc);
+ Ret != OFFLOAD_SUCCESS)
+ return Ret;
+ return OFFLOAD_SUCCESS;
+ };
+ AsyncInfo.addPostProcessingFunction(LaunchAllocDeleter);
+ });
+ return OFFLOAD_SUCCESS;
+}
+
+EXTERN void *__tgt_acc_get_deviceptr(ident_t *Loc, void *BasePtr, int64_t Flags,
+ void *HostPtr) {
+ FUNC_LOGGER(Loc);
+ ODBG(ADT_Interface) << Loc << "BasePtr: " << BasePtr << ", "
+ << "Flags: " << llvm::format_hex(Flags, 0) << ", "
+ << "HostPtr: " << HostPtr;
+
+ void *DevicePtr = nullptr;
+
+ llvm::Expected<DeviceTy &> DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << "Failed to get device: "
+ << toString(DeviceOrErr.takeError());
+ DeviceTy &Device = *DeviceOrErr;
+
+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+ Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
+ DevicePtr = Device.getMappingInfo().getTgtPtrBegin(HDTTMap, HostPtr, 0);
+
+ ODBG(ADT_Interface) << "DevicePtr: " << DevicePtr;
+
+ return DevicePtr;
+}
+
+EXTERN void __tgt_acc_set_default_async(ident_t *Loc, int64_t Async) {
+ FUNC_LOGGER(Loc);
+ ODBG(ADT_Interface) << Loc << ": Set async=" << asyncToString(Async);
+
+ if (Async == AccAsyncSync) {
+ REPORT_FATAL() << Loc
+ << "The default queue cannot be set to `acc_async_sync'";
+ } else if (Async == AccAsyncNoval) {
+ REPORT_FATAL() << Loc
+ << "The default queue cannot be set to `acc_async_noval'";
+ } else if (Async == AccAsyncDefault) {
+ Async = AccAsyncDefaultQueue;
+ } else if (Async < 0) {
+ REPORT_FATAL() << Loc << "Negative queues are invalid";
+ }
+
+ icv::AccDefaultAsyncVar = Async;
+}
+
+EXTERN void __tgt_acc_set_device_num(ident_t *Loc, int64_t Flags,
+ int64_t DeviceType, int64_t DeviceNum) {
+ FUNC_LOGGER(Loc);
+ // OpenACC 3.3: If the value of device_num argument is negative, the runtime
+ // will revert to the default behavior, which is implementation-defined. A set
+ // device_num directive is functionally equivalent
+ if (DeviceNum < 0) {
+ DeviceNum = 0;
+ }
+
+ // OpenACC 3.3: If the value of the device_type argument is zero or the clause
+ // does not appear, the selected device number will be used for all attached
+ // accelerator types.
+ if (DeviceType == 0) {
+ DM->setAllDeviceId(DeviceNum);
+ return;
+ }
+
+ DM->setDeviceType(static_cast<acc_device_t>(DeviceType));
+ DM->setDeviceId(DeviceNum);
+}
+
+EXTERN void __tgt_acc_set_device_type(ident_t *Loc, int64_t Flags,
+ int64_t DeviceType) {
+ FUNC_LOGGER(Loc);
+ DM->setDeviceType(static_cast<acc_device_t>(DeviceType));
+}
+
+EXTERN int __tgt_acc_wait(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ int32_t DeviceNum, uint32_t WaitNum,
+ int64_t *WaitList, int64_t Async) {
+ FUNC_LOGGER(Loc);
+ ODBG(ADT_Interface) << Loc << "\n"
+ << "DeviceNum: " << DeviceNum << ", "
+ << "DeviceType: " << DeviceType << ", "
+ << "WaitNum: " << WaitNum;
+ for (size_t I = 0; I < WaitNum; I++) {
+ ODBG(ADT_Interface) << "WaitList[" << I
+ << "]: " << asyncToString(WaitList[I]);
+ }
+ ODBG(ADT_Interface) << "Async: " << asyncToString(Async)
+ << " Flags: " << llvm::format_hex(Flags, 0);
+
+ accAsyncWait(Loc, DM->getPMDeviceId(), WaitNum, WaitList);
+
+ return 0;
+}
+
+namespace {
+static std::mutex InitMutex;
+uint32_t InitRefCount = 0;
+
+static void initAccRuntime() {
+ FUNC_LOGGER();
+ initRuntime(/*OffloadEnabled=*/true);
+ // TODO Blindly register all rtls for now. In reality we should only be
+ // initializing the requested types in case we come from __tgt_acc_init(), or
+ // only the ones we have device code for.
+ __tgt_init_all_rtls();
+
+ InitRefCount++;
+ if (InitRefCount == 1) {
+ llvm::acc::target::DM = new llvm::acc::target::DeviceManagerTy();
+ llvm::acc::target::DM->init();
+
+ llvm::acc::target::QueueManager = new llvm::acc::target::QueueManagerTy();
+ llvm::acc::target::QueueManager->init();
+ }
+ llvm::acc::target::DM->refreshDeviceMapping(/*UpdateDeviceType=*/true);
+}
+
+static void deinitAccRuntime() {
+ FUNC_LOGGER();
+ if (InitRefCount == 1) {
+ llvm::acc::target::QueueManager->deinit();
+ delete llvm::acc::target::QueueManager;
+ llvm::acc::target::QueueManager = nullptr;
+
+ llvm::acc::target::DM->deinit();
+ delete llvm::acc::target::DM;
+ llvm::acc::target::DM = nullptr;
+ }
+ InitRefCount--;
+
+ deinitRuntime();
+}
+} // namespace
+
+EXTERN void __tgt_acc_init(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ int64_t DeviceNum) {
+ std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+ FUNC_LOGGER(Loc);
+ REPORT_WARN() << "acc init ignores user's request and initializes all "
+ "available devices.";
+ initAccRuntime();
+ std::atexit([]() {
+ std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+ FUNC_LOGGER();
+ deinitAccRuntime();
+ });
+}
+
+EXTERN void __tgt_acc_shutdown(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ int64_t DeviceNum) {
+ std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+ FUNC_LOGGER(Loc);
+ REPORT_WARN() << "acc shutdown is ignored.";
+}
+
+EXTERN void __tgt_acc_register_lib(__tgt_bin_desc *Desc) {
+ std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+ FUNC_LOGGER();
+ initAccRuntime();
+ if (PM->delayRegisterLib(__tgt_acc_register_lib, Desc))
+ return;
+
+ PM->registerLib(Desc);
+ llvm::acc::target::DM->refreshDeviceMapping(/*UpdateDeviceType=*/true);
+}
+
+EXTERN void __tgt_acc_unregister_lib(__tgt_bin_desc *Desc) {
+ std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+ FUNC_LOGGER();
+ PM->unregisterLib(Desc);
+
+ deinitAccRuntime();
+}
diff --git a/offload/libacctarget/Interface.h b/offload/libacctarget/Interface.h
new file mode 100644
index 0000000000000..d65055d95f590
--- /dev/null
+++ b/offload/libacctarget/Interface.h
@@ -0,0 +1,270 @@
+//===- Interface.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TGT_ACC_TARGET_H__
+#define __TGT_ACC_TARGET_H__
+
+#ifdef __cplusplus
+#include "flang-rt/runtime/descriptor.h"
+using namespace Fortran::ISO;
+#else
+#include "flang/ISO_Fortran_binding.h"
+#endif
+
+#include "Shared/APITypes.h"
+#include "Shared/SourceInfo.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+// Portable alignment attribute for C89/C99 compatibility
+#if defined(_MSC_VER)
+#define ACC_ALIGNED(x) __declspec(align(x))
+#elif defined(__GNUC__) || defined(__clang__)
+#define ACC_ALIGNED(x) __attribute__((aligned(x)))
+#else
+#define ACC_ALIGNED(x)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//===----------------------------------------------------------------------===//
+// OpenACC Target Offload Types and Enums
+//===----------------------------------------------------------------------===//
+
+enum {
+ // no flags
+ TGT_ACC_MAPTYPE_NONE = 0x0,
+ // copy data from host to device
+ TGT_ACC_MAPTYPE_TO = 0x1, // enter data
+ // copy data from device to host
+ TGT_ACC_MAPTYPE_FROM = 0x2, // exit data
+ // force unmapping of data
+ TGT_ACC_MAPTYPE_FINALIZE = 0x8,
+ // map the pointer as well as the pointee
+ TGT_ACC_MAPTYPE_PTR_AND_OBJ = 0x10,
+ // private variable - not mapped
+ TGT_ACC_MAPTYPE_PRIVATE = 0x80,
+ // copy by value - not mapped
+ TGT_ACC_MAPTYPE_LITERAL = 0x100,
+ // device pointer - already mapped
+ TGT_ACC_MAPTYPE_DEVPTR = 0x400,
+ // device pointer
+ TGT_ACC_MAPTYPE_MANAGED_DEVPTR = 0x800,
+ // present or don't create
+ TGT_ACC_MAPTYPE_NO_CREATE = 0x2000,
+ // private variable - gang
+ TGT_ACC_MAPTYPE_GANG_PRIVATE = 0x4000,
+ // private variable - worker
+ TGT_ACC_MAPTYPE_WORKER_PRIVATE = 0x8000,
+ // private variable - vector
+ TGT_ACC_MAPTYPE_VECTOR_PRIVATE = 0x10000,
+ // zero modifier
+ TGT_ACC_MAPTYPE_INIT_ZERO = 0x20000,
+ // device resident memory - persistent allocation
+ TGT_ACC_MAPTYPE_DEVICE_RESIDENT = 0x40000,
+ // present or not
+ TGT_ACC_MAPTYPE_IF_PRESENT = 0x80000,
+ // present clause: skip attach/detach to preserve user-managed pointers
+ TGT_ACC_MAPTYPE_PRESENT = 0x100000,
+};
+
+/// Array descriptor types
+enum {
+ TGT_ACC_DESC_GENERIC = 0, // Generic type descriptor.
+ TGT_ACC_DESC_F18 = 1, // Fortran 2018 type descriptor.
+ TGT_ACC_DESC_MEMREF = 2, // MemRef type descriptor.
+ TGT_ACC_DESC_OPENACC = 0x1000 // OpenACC descriptor.
+};
+
+/// Device pointer type.
+typedef uintptr_t tgt_acc_devptr_t;
+
+/// Type descriptor base struct.
+typedef struct {
+ // Version of the descriptor.
+ int32_t Version;
+} AccDataDesc;
+
+/// Generic type descriptor.
+typedef struct {
+ AccDataDesc Base;
+} AccDataDescGeneric;
+
+/// F18 type descriptor.
+typedef struct {
+ AccDataDesc Base;
+ CFI_cdesc_t *FortranDescriptor;
+} AccDataDescF18;
+
+/// The structure defined by LLVMTypeConverter::getMemRefDescriptorFields.
+typedef struct {
+ void *allocatedPtr;
+ void *alignedPtr;
+ uint64_t offset;
+
+ uint64_t sizes[1];
+// Below are the real fields in the struct where Rank is a compile-time
+// constant. We use offsets from the above sizes to obtain the addresses of
+// the sizes and strides arrays.
+#if 0
+ uint64_t sizes[Rank];
+ uint64_t strides[Rank];
+#endif
+} MemRefDesc;
+
+/// MemRef type descriptor.
+typedef struct {
+ AccDataDesc Base;
+ unsigned char Rank;
+ uint64_t ElementSize;
+ MemRefDesc *MemRefDescriptor;
+} AccDataDescMemRef;
+
+/// OpenACC descriptor.
+typedef struct {
+ AccDataDesc Base;
+ ACC_ALIGNED(8) unsigned char Rank;
+ int64_t ElementSize;
+ int64_t *LowerBounds;
+ int64_t *UpperBounds;
+ int64_t *Extents;
+ int64_t *StridesInBytes;
+ int64_t *StartIndices;
+} AccDataDescOpenACC;
+
+/// This struct contains all of the arguments to a target kernel region launch.
+typedef struct {
+ // Version of this struct for ABI compatibility.
+ uint32_t Version;
+ // Number of arguments in each input pointer.
+ uint32_t ArgNum;
+ // Base pointer of each argument (e.g. a struct).
+ void **ArgBasePtrs;
+ // Pointer to the argument data.
+ void **ArgPtrs;
+ // Size of the argument data in bytes.
+ int64_t *ArgSizes;
+ // Type of the data (e.g. to / from).
+ int64_t *ArgTypes;
+ // Name of the data for debugging, possibly null.
+ char **ArgNames;
+ // User-defined mappers (e.g. C++ copy ctors), possibly null.
+ void **ArgMappers;
+ // Type descriptors.
+ AccDataDesc **ArgDescs;
+ // Loop tripcount.
+ uint64_t Tripcount;
+ // Values of the num_gangs clause, in three dimensions.
+ int64_t NumGangs[3];
+ // Value of the num_workers clause.
+ int64_t NumWorkers;
+ // Value of the vector_length clause.
+ int64_t VectorLength;
+ // Size of shared memory.
+ int64_t SmemSize;
+} AccKernelArgsTy;
+
+//===----------------------------------------------------------------------===//
+// OpenACC Target Offload Runtime Compiler Interface API
+//===----------------------------------------------------------------------===//
+
+/// adds a target shared library to the target execution image
+void __tgt_acc_register_lib(__tgt_bin_desc *Desc);
+
+/// removes a target shared library from the target execution image
+void __tgt_acc_unregister_lib(__tgt_bin_desc *Desc);
+
+/// 'acc init' directive
+void __tgt_acc_init(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ int64_t DeviceNum);
+
+/// 'acc shutdown' directive
+void __tgt_acc_shutdown(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ int64_t DeviceNum);
+
+/// 'acc declare' directive
+void __tgt_acc_declare(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs, int64_t Async,
+ __tgt_bin_desc *Desc);
+
+/// 'acc enter data' directive
+void __tgt_acc_data_enter(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs,
+ int64_t Async);
+
+/// 'acc exit data' directive
+void __tgt_acc_data_exit(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs,
+ int64_t Async);
+
+/// 'acc update' directive
+void __tgt_acc_data_update(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes,
+ char **ArgNames, void **ArgMappers,
+ AccDataDesc **ArgDescs, int64_t Async);
+
+/// data mapping begin (for `acc data` construct or compute construct)
+void __tgt_acc_data_begin(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs,
+ int64_t Async);
+
+/// data mapping end (for `acc data` construct or compute construct)
+void __tgt_acc_data_end(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+ void **ArgMappers, AccDataDesc **ArgDescs,
+ int64_t Async);
+
+/// compute construct directive
+int __tgt_acc_kernel(ident_t *Loc, void *Kernel, int64_t Flags,
+ int64_t DeviceType, AccKernelArgsTy *Args, int64_t Async,
+ const char *KernelName, __tgt_bin_desc *Desc);
+
+/// 'acc wait' directive
+int __tgt_acc_wait(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ int32_t DeviceNum, uint32_t WaitNum, int64_t *WaitList,
+ int64_t Async);
+
+/// `acc host_data use_device` directive
+void *__tgt_acc_get_deviceptr(ident_t *Loc, void *BasePtr, int64_t Flags,
+ void *HostPtr);
+
+/// 'acc set default_async' directive
+void __tgt_acc_set_default_async(ident_t *Loc, int64_t Async);
+
+/// 'acc set device_num' directive
+void __tgt_acc_set_device_num(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ int64_t DeviceNum);
+
+/// 'acc set device_type' directive
+void __tgt_acc_set_device_type(ident_t *Loc, int64_t Flags, int64_t DeviceType);
+
+/// Mirror allocation for declare action recipes
+void __tgt_acc_mirror_alloc(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+ uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+ int64_t *ArgSizes, int64_t *ArgTypes,
+ char **ArgNames, void **ArgMappers,
+ AccDataDesc **ArgDescs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __TGT_ACC_TARGET_H__
diff --git a/offload/libacctarget/Logger.h b/offload/libacctarget/Logger.h
new file mode 100644
index 0000000000000..7ffc9ae8e2bed
--- /dev/null
+++ b/offload/libacctarget/Logger.h
@@ -0,0 +1,47 @@
+//===- Logger.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ACC_OFFLOAD_LOGGER_H_
+#define LLVM_ACC_OFFLOAD_LOGGER_H_
+
+#include "Shared/Debug.h"
+#include "Shared/SourceInfo.h"
+#include <optional>
+
+namespace llvm::acc::target::debug {
+inline std::string formatLoc(ident_t *Loc) {
+ SourceInfo SI(Loc);
+ return std::string(SI.getFilename()) + ":" + std::to_string(SI.getLine());
+}
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ident_t *Loc) {
+ return OS << formatLoc(Loc);
+}
+struct ScopeLoggerTy {
+ const char *ScopeName;
+ std::optional<ident_t *> Loc = std::nullopt;
+ ScopeLoggerTy(const char *ScopeName, ident_t *Loc)
+ : ScopeName(ScopeName), Loc(Loc) {
+ ODBG() << "> " << ScopeName << "(" << Loc << ")";
+ }
+ ScopeLoggerTy(const char *ScopeName) : ScopeName(ScopeName) {
+ ODBG() << "> " << ScopeName;
+ }
+ ~ScopeLoggerTy() {
+ if (Loc)
+ ODBG() << "< " << ScopeName << "(" << *Loc << ")";
+ else
+ ODBG() << "< " << ScopeName;
+ }
+};
+} // namespace llvm::acc::target::debug
+
+#define FUNC_LOGGER(...) \
+ ScopeLoggerTy FunctionScopeLogger(__FUNCTION__, ##__VA_ARGS__)
+
+#endif // LLVM_ACC_OFFLOAD_LOGGER_H_
diff --git a/offload/libacctarget/Private.h b/offload/libacctarget/Private.h
new file mode 100644
index 0000000000000..c7d15aa23c189
--- /dev/null
+++ b/offload/libacctarget/Private.h
@@ -0,0 +1,29 @@
+//===- Private.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _OPENACC_PRIVATE_H_
+#define _OPENACC_PRIVATE_H_
+
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm::acc::target {
+int accIsPresent(void *);
+void *accAlloc(size_t);
+void accFree(void *);
+void accMemcpyFromDevice(void *, void *, size_t);
+void accMemcpyToDevice(void *, void *, size_t);
+void accMemcpyD2D(void *, void *, size_t, int, int);
+void accMapData(void *, void *, size_t);
+void accUnmapData(void *);
+
+void *accDataEnter(void *ArgBasePtr, void *ArgPtr, int64_t ArgSize,
+ int64_t ArgType, int64_t Async);
+} // namespace llvm::acc::target
+
+#endif // _OPENACC_PRIVATE_H_
diff --git a/offload/libacctarget/QueueManager.cpp b/offload/libacctarget/QueueManager.cpp
new file mode 100644
index 0000000000000..e9fa4a82cc559
--- /dev/null
+++ b/offload/libacctarget/QueueManager.cpp
@@ -0,0 +1,179 @@
+//===- QueueManager.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "QueueManager.h"
+#include "PluginManager.h"
+#include "Private.h"
+#include "Shared/Debug.h"
+
+namespace llvm::acc::target {
+QueueManagerTy *QueueManager = nullptr;
+} // namespace llvm::acc::target
+
+using namespace llvm::acc::target;
+
+static void synchronizeQueueOrDie(QueueIdTy Queue, DeviceTy &Device,
+ AsyncInfoTy *AsyncInfo) {
+
+ ODBG() << "Synchronizing stream " << AsyncInfo << " for device "
+ << Device.DeviceID << " " << &Device << " with queue ID " << Queue;
+ auto Res = AsyncInfo->synchronize();
+ if (Res != OFFLOAD_SUCCESS)
+ FATAL_MESSAGE(Device.DeviceID,
+ "Failed to synchronize queue %" PRIi32 " on device %d", Queue,
+ Device.DeviceID);
+}
+
+static QueueManagerTy::StatusTy
+queryQueueOrDie(QueueIdTy Queue, DeviceTy &Device, AsyncInfoTy *AsyncInfo) {
+
+ ODBG() << "Querying stream " << AsyncInfo << " for device " << Device.DeviceID
+ << " " << &Device << " with queue ID " << Queue;
+ auto Res = AsyncInfo->query();
+ if (Res == OFFLOAD_FAIL)
+ FATAL_MESSAGE(Device.DeviceID,
+ "Failed to query queue %" PRIi32 " on device %d", Queue,
+ Device.DeviceID);
+ return static_cast<QueueManagerTy::StatusTy>(Res);
+}
+
+QueueManagerTy::QueueManagerTy() {}
+
+QueueManagerTy::~QueueManagerTy() {}
+
+void QueueManagerTy::synchronize() {
+ for (auto &[Key, Q] : QueueMap) {
+ auto &[D, Id] = Key;
+ synchronizeQueueOrDie(Id, *D, Q.get());
+ }
+}
+
+void QueueManagerTy::synchronize(DeviceTy &Device) {
+ for (auto &[Key, Q] : QueueMap) {
+ auto &[D, Id] = Key;
+ if (&Device != D)
+ continue;
+ synchronizeQueueOrDie(Id, Device, Q.get());
+ }
+}
+
+void QueueManagerTy::synchronize(DeviceTy &Device, QueueIdTy Queue) {
+ AsyncInfoTy *AsyncInfo = QueueManager->get(Device, Queue);
+ synchronizeQueueOrDie(Queue, Device, AsyncInfo);
+}
+
+QueueManagerTy::StatusTy QueueManagerTy::query() {
+ for (auto &[Key, Q] : QueueMap) {
+ auto &[D, Id] = Key;
+ if (queryQueueOrDie(Id, *D, Q.get()) == StatusTy::NOT_READY)
+ return StatusTy::NOT_READY;
+ }
+ return StatusTy::READY;
+}
+
+QueueManagerTy::StatusTy QueueManagerTy::query(DeviceTy &Device) {
+ for (auto &[Key, Q] : QueueMap) {
+ auto &[D, Id] = Key;
+ if (&Device != D)
+ continue;
+ if (queryQueueOrDie(Id, *D, Q.get()) == StatusTy::NOT_READY)
+ return StatusTy::NOT_READY;
+ }
+ return StatusTy::READY;
+}
+
+QueueManagerTy::StatusTy QueueManagerTy::query(DeviceTy &Device,
+ QueueIdTy Queue) {
+ AsyncInfoTy *AsyncInfo = QueueManager->get(Device, Queue);
+ return queryQueueOrDie(Queue, Device, AsyncInfo);
+}
+
+AsyncInfoTy *QueueManagerTy::get(DeviceTy &Device, QueueIdTy QueueId) {
+ static std::mutex Mutex;
+ std::lock_guard<std::mutex> G(Mutex);
+
+ auto Insertion = QueueMap.insert({std::make_pair(&Device, QueueId), nullptr});
+ if (Insertion.second) {
+ Insertion.first->second = std::make_unique<AsyncInfoTy>(
+ Device, AsyncInfoTy::SyncTy::STATIC_NON_BLOCKING);
+ ODBG() << "Initialized new stream for device " << &Device << " id "
+ << QueueId << " -> " << Insertion.first->second.get();
+ }
+ return Insertion.first->second.get();
+}
+
+namespace llvm::acc::target {
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, int64_t WaitArg) {
+ int64_t WaitArgs[] = {WaitArg};
+ accAsyncWait(Loc, DeviceId, 1, WaitArgs);
+}
+
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, uint32_t WaitNum,
+ int64_t *WaitList) {
+ ODBG() << "Synchronizing streams for device " << DeviceId;
+
+ auto DeviceOrErr = PM->getDevice(DeviceId);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ if (WaitNum == 0) {
+ QueueManager->synchronize(*DeviceOrErr);
+ } else {
+ for (unsigned I = 0; I < WaitNum; I++)
+ QueueManager->synchronize(*DeviceOrErr, WaitList[I]);
+ }
+}
+
+void accAsyncWaitAll(ident_t *Loc, int64_t DeviceId) {
+ ODBG() << "Synchronizing all streams for device " << DeviceId;
+
+ auto DeviceOrErr = PM->getDevice(DeviceId);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+ QueueManager->synchronize(*DeviceOrErr);
+}
+
+void accAsyncWaitAll(ident_t *Loc) {
+ ODBG() << "Synchronizing all streams";
+ QueueManager->synchronize();
+}
+
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, int64_t TestArg) {
+ int64_t TestList[] = {TestArg};
+ return accAsyncTest(Loc, DeviceId, 1, TestList);
+}
+
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, uint32_t TestNum,
+ int64_t *TestList) {
+ ODBG() << "Querying streams for device " << DeviceId;
+
+ auto DeviceOrErr = PM->getDevice(DeviceId);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ for (unsigned I = 0; I < TestNum; I++)
+ if (QueueManager->query(*DeviceOrErr, TestList[I]) ==
+ QueueManagerTy::StatusTy::NOT_READY)
+ return static_cast<int>(QueueManagerTy::StatusTy::NOT_READY);
+ return static_cast<int>(QueueManagerTy::StatusTy::READY);
+}
+
+int accAsyncTestAll(ident_t *Loc, int64_t DeviceId) {
+ ODBG() << "Querying all streams for device " << DeviceId;
+
+ auto DeviceOrErr = PM->getDevice(DeviceId);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+ return static_cast<int>(QueueManager->query(*DeviceOrErr));
+}
+
+int accAsyncTestAll(ident_t *Loc) {
+ ODBG() << "Querying all streams";
+ return static_cast<int>(QueueManager->query());
+}
+} // namespace llvm::acc::target
diff --git a/offload/libacctarget/QueueManager.h b/offload/libacctarget/QueueManager.h
new file mode 100644
index 0000000000000..13d3ce96122ed
--- /dev/null
+++ b/offload/libacctarget/QueueManager.h
@@ -0,0 +1,77 @@
+//===- QueueManager.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _QUEUEMANAGER_H_
+#define _QUEUEMANAGER_H_
+
+#include "omptarget.h"
+
+namespace llvm::acc::target {
+
+using QueueIdTy = uint32_t;
+
+class QueueManagerTy {
+public:
+ QueueManagerTy();
+ ~QueueManagerTy();
+
+ void init() {}
+ void deinit() {}
+
+ enum class StatusTy { READY = 0, NOT_READY = 1 };
+
+ AsyncInfoTy *get(DeviceTy &Device, QueueIdTy QueueId);
+
+ void synchronize(DeviceTy &Device, QueueIdTy Queue);
+ void synchronize(DeviceTy &Device);
+ void synchronize();
+
+ StatusTy query(DeviceTy &Device, QueueIdTy Queue);
+ StatusTy query(DeviceTy &Device);
+ StatusTy query();
+
+private:
+ std::map<std::pair<DeviceTy *, QueueIdTy>, std::unique_ptr<AsyncInfoTy>>
+ QueueMap;
+};
+
+extern QueueManagerTy *QueueManager;
+
+class QueueAsyncInfoWrapperTy {
+ AsyncInfoTy *AsyncInfo;
+
+public:
+ QueueAsyncInfoWrapperTy(DeviceTy &Device, QueueIdTy QueueId) {
+ AsyncInfo = QueueManager->get(Device, QueueId);
+ }
+
+ ~QueueAsyncInfoWrapperTy() {}
+
+ operator AsyncInfoTy &() { return *AsyncInfo; }
+};
+
+extern QueueManagerTy *QueueManager;
+
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, int64_t WaitArg);
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, uint32_t WaitNum,
+ int64_t *WaitList);
+void accAsyncWaitAll(ident_t *Loc, int64_t DeviceId);
+void accAsyncWaitAll(ident_t *Loc);
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, int64_t TestArg);
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, uint32_t TestNum,
+ int64_t *TestList);
+int accAsyncTestAll(ident_t *Loc, int64_t DeviceId);
+int accAsyncTestAll(ident_t *Loc);
+} // namespace llvm::acc::target
+
+namespace llvm::acc::target::icv {
+// acc-default-async-var
+extern thread_local int32_t AccDefaultAsyncVar;
+} // namespace llvm::acc::target::icv
+
+#endif // _QUEUEMANAGER_H_
diff --git a/offload/libacctarget/RuntimeImpl.cpp b/offload/libacctarget/RuntimeImpl.cpp
new file mode 100644
index 0000000000000..fa645c29fa713
--- /dev/null
+++ b/offload/libacctarget/RuntimeImpl.cpp
@@ -0,0 +1,175 @@
+//===- AccImpl.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Debug.h"
+#include "DeviceManager.h"
+#include "Logger.h"
+#include "PluginManager.h"
+#include "Private.h"
+#include "Shared/Debug.h"
+
+using namespace llvm::acc::target::debug;
+
+namespace llvm::acc::target {
+int accIsPresent(void *Ptr) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << "Address " << Ptr;
+
+ auto DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+ TargetPointerResultTy TPR =
+ DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1,
+ /*UpdateRefCount=*/false,
+ /*UseHoldRefCount=*/false);
+ int Rc = TPR.isPresent();
+ ODBG(ADT_Interface) << "Result " << Rc;
+ return Rc;
+}
+
+void *accAlloc(size_t Size) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << "Allocating " << Size << " bytes";
+
+ if (Size <= 0) {
+ ODBG(ADT_Interface) << "Non-positive length";
+ return NULL;
+ }
+
+ void *Rc = NULL;
+ auto DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+ Rc = DeviceOrErr->allocData(Size, nullptr);
+ ODBG(ADT_Interface) << "Device ptr " << Rc;
+ return Rc;
+}
+
+void accFree(void *DevicePtr) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << "Address " << DevicePtr;
+ auto DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+ if (DeviceOrErr->deleteData(DevicePtr) == OFFLOAD_FAIL)
+ REPORT_FATAL() << "Failed to deallocate device ptr. Set "
+ "OFFLOAD_TRACK_ALLOCATION_TRACES=1 to track allocations.";
+}
+
+void accMemcpyToDevice(void *Dst, void *Src, size_t Bytes) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << Dst << " <- " << Src << ", " << Bytes << " bytes";
+
+ if (!Dst || !Src || Bytes <= 0) {
+ if (Bytes == 0) {
+ ODBG(ADT_Interface) << "Zero bytes, nothing to do";
+ return;
+ }
+ REPORT() << "Invalid arguments";
+ return;
+ }
+
+ auto DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+ AsyncInfoTy AsyncInfo(*DeviceOrErr);
+ int Rc = DeviceOrErr->submitData(Dst, Src, Bytes, AsyncInfo);
+ ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accMemcpyFromDevice(void *Dst, void *Src, size_t Bytes) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << Dst << " <- " << Src << ", " << Bytes << " bytes";
+
+ if (!Dst || !Src || Bytes <= 0) {
+ if (Bytes == 0) {
+ ODBG(ADT_Interface) << "Zero bytes, nothing to do";
+ return;
+ }
+ REPORT() << "Invalid arguments";
+ return;
+ }
+
+ auto DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+ AsyncInfoTy AsyncInfo(*DeviceOrErr);
+ int Rc = DeviceOrErr->retrieveData(Dst, Src, Bytes, AsyncInfo);
+ ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accMemcpyD2D(void *Dst, void *Src, size_t Bytes, int DstDevice,
+ int SrcDevice) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << Dst << " <- " << Src << ", " << Bytes << " bytes";
+
+ if (!Dst || !Src || Bytes <= 0) {
+ if (Bytes == 0) {
+ ODBG(ADT_Interface) << "Zero bytes, nothing to do";
+ return;
+ }
+ REPORT() << "Invalid arguments";
+ return;
+ }
+
+ auto DstDeviceOrErr = DM->getDevice();
+ if (!DstDeviceOrErr)
+ REPORT_FATAL() << toString(DstDeviceOrErr.takeError()).c_str();
+ auto SrcDeviceOrErr = DM->getDevice();
+ if (!SrcDeviceOrErr)
+ REPORT_FATAL() << toString(SrcDeviceOrErr.takeError()).c_str();
+ if (!SrcDeviceOrErr->isDataExchangable(*DstDeviceOrErr)) {
+ REPORT() << "D2D not allowed for current device type";
+ return;
+ }
+
+ AsyncInfoTy AsyncInfo(*SrcDeviceOrErr);
+ int Rc =
+ SrcDeviceOrErr->dataExchange(Src, *DstDeviceOrErr, Dst, Bytes, AsyncInfo);
+ ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accMapData(void *Hst, void *Dev, size_t Bytes) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << Hst << " <-> " << Dev << ", " << Bytes << " bytes";
+
+ if (!Hst || !Dev || Bytes <= 0) {
+ REPORT() << "Invalid arguments";
+ return;
+ }
+
+ auto DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+ int Rc = DeviceOrErr->getMappingInfo().associatePtr(
+ const_cast<void *>(Hst), const_cast<void *>(Dev), Bytes);
+ ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accUnmapData(void *Hst) {
+ FUNC_LOGGER();
+ ODBG(ADT_Interface) << Hst;
+
+ if (!Hst) {
+ REPORT() << "Invalid arguments";
+ return;
+ }
+
+ auto DeviceOrErr = DM->getDevice();
+ if (!DeviceOrErr)
+ REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+ int Rc =
+ DeviceOrErr->getMappingInfo().disassociatePtr(const_cast<void *>(Hst));
+ ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+} // namespace llvm::acc::target
diff --git a/offload/libacctarget/RuntimeInterface.cpp b/offload/libacctarget/RuntimeInterface.cpp
new file mode 100644
index 0000000000000..bb379f0adcf24
--- /dev/null
+++ b/offload/libacctarget/RuntimeInterface.cpp
@@ -0,0 +1,248 @@
+//===- AccEntryCommonImpl.cpp -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeviceManager.h"
+#include "Interface.h"
+#include "Private.h"
+#include "QueueManager.h"
+#include "Shared/Debug.h"
+#include "include/openacc.h"
+
+using namespace llvm::acc::target;
+
+extern "C" {
+int acc_get_num_devices(acc_device_t DevType) {
+ return DM->getNumDevices(DevType);
+}
+int acc_get_num_devices_(acc_device_t *DevType) {
+ return acc_get_num_devices(*DevType);
+}
+
+int acc_get_device_num(acc_device_t DevType) {
+ return DM->getDeviceId(DevType);
+}
+int acc_get_device_num_(acc_device_t *DevType) {
+ return acc_get_device_num(*DevType);
+}
+
+void acc_set_device_num(int DevNum, acc_device_t DevType) {
+ __tgt_acc_set_device_num(nullptr, 0, DevType, DevNum);
+}
+void acc_set_device_num_(int *DevNum, acc_device_t *DevType) {
+ acc_set_device_num(*DevNum, *DevType);
+}
+
+void acc_set_device_type(acc_device_t DevType) {
+ __tgt_acc_set_device_type(nullptr, 0, DevType);
+}
+void acc_set_device_type_(acc_device_t *DevType) {
+ acc_set_device_type(*DevType);
+}
+
+void acc_set_device(acc_device_t DevType) {
+ __tgt_acc_set_device_type(nullptr, 0, DevType);
+}
+void acc_set_device_(acc_device_t *DevType) { acc_set_device(*DevType); }
+
+acc_device_t acc_get_device_type(void) { return DM->getDeviceType(); }
+acc_device_t acc_get_device_type_(void) { return acc_get_device_type(); }
+
+acc_device_t acc_get_device(void) { return DM->getDeviceType(); }
+acc_device_t acc_get_device_(void) { return acc_get_device(); }
+
+size_t acc_get_property(int DevNum, acc_device_t DevType,
+ acc_device_property_t Prop) {
+ return DM->getDeviceProperty(DevNum, DevType, Prop);
+}
+size_t acc_get_property_(int *DevNum, acc_device_t *DevType,
+ acc_device_property_t *Prop) {
+ return acc_get_property(*DevNum, *DevType, *Prop);
+}
+
+const char *acc_get_property_string(int DevNum, acc_device_t DevType,
+ acc_device_property_t Prop) {
+ return DM->getDevicePropertyString(DevNum, DevType, Prop);
+}
+const char *acc_get_property_string_(int *DevNum, acc_device_t *DevType,
+ acc_device_property_t *Prop) {
+ return acc_get_property_string(*DevNum, *DevType, *Prop);
+}
+
+void acc_async_wait(int WaitArg) {
+ accAsyncWait(nullptr, DM->getPMDeviceId(), WaitArg);
+}
+void acc_async_wait_(int *WaitArg) { acc_async_wait(*WaitArg); }
+
+void acc_wait_async(int WaitArg) {
+ accAsyncWait(nullptr, DM->getPMDeviceId(), WaitArg);
+}
+void acc_wait_async_(int *WaitArg) { acc_wait_async(*WaitArg); }
+
+void acc_wait(int WaitArg) {
+ accAsyncWait(nullptr, DM->getPMDeviceId(), WaitArg);
+}
+void acc_wait_(int *WaitArg) { acc_wait(*WaitArg); }
+
+void acc_wait_device(int WaitArg, int DevNum) {
+ accAsyncWait(nullptr, DevNum, WaitArg);
+}
+void acc_wait_device_(int *WaitArg, int *DevNum) {
+ acc_wait_device(*WaitArg, *DevNum);
+}
+
+void acc_wait_all_async() { accAsyncWaitAll(nullptr); }
+void acc_wait_all_async_() { acc_wait_all_async(); }
+
+void acc_async_wait_all() { accAsyncWaitAll(nullptr); }
+void acc_async_wait_all_() { acc_async_wait_all(); }
+
+void acc_wait_all() { accAsyncWaitAll(nullptr); }
+void acc_wait_all_() { acc_wait_all(); }
+
+void acc_wait_all_device(int DevNum) { accAsyncWaitAll(nullptr, DevNum); }
+void acc_wait_all_device_(int *DevNum) { acc_wait_all_device(*DevNum); }
+
+int acc_wait_any(int Count, int *WaitNum) {
+ REPORT_FATAL() << "acc_wait_any not yet implemented.";
+ return 0;
+}
+int acc_wait_any_(int *Count, int **WaitNum) {
+ return acc_wait_any(*Count, *WaitNum);
+}
+
+int acc_wait_any_device(int Count, int *WaitNum, int DevNum) {
+ REPORT_FATAL() << "acc_wait_any_device not yet implemented.";
+ return 0;
+}
+int acc_wait_any_device_(int *Count, int **WaitNum, int *DevNum) {
+ return acc_wait_any_device(*Count, *WaitNum, *DevNum);
+}
+
+void acc_set_default_async(int Async) {
+ __tgt_acc_set_default_async(nullptr, Async);
+}
+void acc_set_default_async_(int *Async) { acc_set_default_async(*Async); }
+
+int acc_get_default_async(void) { return icv::AccDefaultAsyncVar; }
+int acc_get_default_async_(void) { return acc_get_default_async(); }
+
+int acc_async_test(int TestArg) {
+ return !accAsyncTest(nullptr, DM->getPMDeviceId(), TestArg);
+};
+int acc_async_test_(int *WaitArg) { return acc_async_test(*WaitArg); }
+
+int acc_async_test_device(int DevNum, int WaitArg) {
+ return !accAsyncTest(nullptr, DevNum, WaitArg);
+}
+int acc_async_test_device_(int *DevNum, int *WaitArg) {
+ return acc_async_test_device(*DevNum, *WaitArg);
+}
+
+int acc_async_test_all(void) { return !accAsyncTestAll(nullptr); }
+int acc_async_test_all_(void) { return acc_async_test_all(); }
+
+int acc_async_test_all_device(int DevNum) {
+ return !accAsyncTestAll(nullptr, DevNum);
+}
+int acc_async_test_all_device_(int *DevNum) {
+ return acc_async_test_all_device(*DevNum);
+}
+
+void acc_init(acc_device_t DevType) { __tgt_acc_init(nullptr, 0, DevType, -1); }
+void acc_init_(acc_device_t *DevType) { acc_init(*DevType); }
+
+void acc_init_device(int DevNum, acc_device_t DevType) {
+ __tgt_acc_init(nullptr, 0, DevType, DevNum);
+}
+void acc_init_device_(int *DevNum, acc_device_t *DevType) {
+ acc_init_device(*DevNum, *DevType);
+}
+
+void acc_shutdown(acc_device_t DevType) {
+ __tgt_acc_shutdown(nullptr, 0, DevType, -1);
+}
+void acc_shutdown_(acc_device_t *DevType) { acc_shutdown(*DevType); }
+
+void acc_shutdown_device(int DevNum, acc_device_t DevType) {
+ __tgt_acc_shutdown(nullptr, 0, DevType, DevNum);
+}
+void acc_shutdown_device_(int *DevNum, acc_device_t *DevType) {
+ acc_shutdown_device(*DevNum, *DevType);
+}
+
+void acc_free(void *DataDev) { accFree(DataDev); }
+void acc_free_(void **DataDev) { acc_free(*DataDev); }
+
+void *acc_malloc(size_t Bytes) { return accAlloc(Bytes); }
+void *acc_malloc_(size_t *Bytes) { return acc_malloc(*Bytes); }
+
+void acc_map_data(void *DataArg, void *DataDev, size_t Bytes) {
+ accMapData(DataArg, DataDev, Bytes);
+}
+void acc_map_data_(void **DataArg, void **DataDev, size_t *Bytes) {
+ acc_map_data(*DataArg, *DataDev, *Bytes);
+}
+
+void acc_unmap_data(void *DataArg) { accUnmapData(DataArg); }
+void acc_unmap_data_(void **DataArg) { acc_unmap_data(*DataArg); }
+
+void *acc_deviceptr(void *DataArg) {
+ return __tgt_acc_get_deviceptr(nullptr, DataArg, 0, DataArg);
+}
+void *acc_deviceptr_(void **DataArg) { return acc_deviceptr(*DataArg); }
+
+void *acc_hostptr(void *DataDev) {
+ REPORT_FATAL() << "acc_hostptr not yet implemented";
+ return nullptr;
+}
+void *acc_hostptr_(void **DataDev) { return acc_hostptr(*DataDev); }
+
+void acc_memcpy_from_device(void *DataHostDest, void *DataDevSrc,
+ size_t Bytes) {
+ accMemcpyFromDevice(DataHostDest, DataDevSrc, Bytes);
+}
+void acc_memcpy_from_device_(void **DataHostDest, void **DataDevSrc,
+ size_t *Bytes) {
+ acc_memcpy_from_device(*DataHostDest, *DataDevSrc, *Bytes);
+}
+
+void acc_memcpy_to_device(void *DataDevDest, void *DataHostSrc, size_t Bytes) {
+ accMemcpyToDevice(DataDevDest, DataHostSrc, Bytes);
+}
+void acc_memcpy_to_device_(void **DataDevDest, void **DataHostSrc,
+ size_t *Bytes) {
+ acc_memcpy_to_device(*DataDevDest, *DataHostSrc, *Bytes);
+}
+
+void acc_memcpy_d2d(void *DataDevDest, void *DataHostSrc, size_t Bytes,
+ int DevNumDest, int DevNumSrc) {
+ accMemcpyD2D(DataDevDest, DataHostSrc, Bytes, DevNumDest, DevNumSrc);
+}
+void acc_memcpy_d2d_(void **DataDevDest, void **DataHostSrc, size_t *Bytes,
+ int *DevNumDest, int *DevNumSrc) {
+ acc_memcpy_d2d(*DataDevDest, *DataHostSrc, *Bytes, *DevNumDest, *DevNumSrc);
+}
+
+int acc_on_device(acc_device_t DevType) { return DevType == acc_device_host; }
+int acc_on_device_(acc_device_t *DevType) { return acc_on_device(*DevType); }
+
+void acc_present_dump_all() {
+ REPORT_WARN() << "acc_present_dump_all not yet implemented";
+}
+void acc_present_dump_all_() { acc_present_dump_all(); }
+
+void acc_attach_dump_all() {
+ REPORT_WARN() << "acc_attach_dump_all not yet implemented";
+}
+void acc_attach_dump_all_() { acc_attach_dump_all(); }
+
+void acc_attach_dump() {
+ REPORT_WARN() << "acc_attach_dump not yet implemented";
+}
+void acc_attach_dump_() { acc_attach_dump(); }
+}
diff --git a/offload/libacctarget/exports b/offload/libacctarget/exports
new file mode 100644
index 0000000000000..4ce05c2542164
--- /dev/null
+++ b/offload/libacctarget/exports
@@ -0,0 +1,182 @@
+VERS1.0 {
+ global:
+ __tgt_acc_register_lib;
+ __tgt_acc_unregister_lib;
+
+ acc_is_present;
+ _cfi_acc_is_present_a;
+
+ acc_create;
+ acc_pcreate;
+ acc_present_or_create;
+ acc_delete;
+ acc_delete_finalize;
+ acc_copyin;
+ acc_pcopyin;
+ acc_present_or_copyin;
+ acc_copyout;
+ acc_copyout_finalize;
+ acc_update_device;
+ acc_updatein;
+ acc_update_self;
+ acc_update_host;
+ acc_updateout;
+
+ acc_create_async;
+ acc_pcreate_async;
+ acc_present_or_create_async;
+ acc_delete_async;
+ acc_delete_finalize_async;
+ acc_copyin_async;
+ acc_pcopyin_async;
+ acc_present_or_copyin_async;
+ acc_copyout_async;
+ acc_copyout_finalize_async;
+ acc_update_device_async;
+ acc_updatein_async;
+ acc_update_self_async;
+ acc_update_host_async;
+ acc_updateout_async;
+
+ _cfi_acc_create_a;
+ _cfi_acc_pcreate_a;
+ _cfi_acc_present_or_create_a;
+ _cfi_acc_delete_a;
+ _cfi_acc_delete_finalize_a;
+ _cfi_acc_copyin_a;
+ _cfi_acc_pcopyin_a;
+ _cfi_acc_present_or_copyin_a;
+ _cfi_acc_copyout_a;
+ _cfi_acc_copyout_finalize_a;
+ _cfi_acc_update_device_a;
+ _cfi_acc_updatein_a;
+ _cfi_acc_update_self_a;
+ _cfi_acc_update_host_a;
+ _cfi_acc_updateout_a;
+
+ _cfi_acc_create_async_a;
+ _cfi_acc_pcreate_async_a;
+ _cfi_acc_present_or_create_async_a;
+ _cfi_acc_delete_async_a;
+ _cfi_acc_delete_finalize_async_a;
+ _cfi_acc_copyin_async_a;
+ _cfi_acc_pcopyin_async_a;
+ _cfi_acc_present_or_copyin_async_a;
+ _cfi_acc_copyout_async_a;
+ _cfi_acc_copyout_finalize_async_a;
+ _cfi_acc_update_device_async_a;
+ _cfi_acc_updatein_async_a;
+ _cfi_acc_update_self_async_a;
+ _cfi_acc_update_host_async_a;
+ _cfi_acc_updateout_async_a;
+
+ __tgt_acc_declare;
+ __tgt_acc_data_update;
+ __tgt_acc_data_enter;
+ __tgt_acc_data_exit;
+ __tgt_acc_data_begin;
+ __tgt_acc_data_end;
+ __tgt_acc_kernel;
+ __tgt_acc_get_deviceptr;
+ __tgt_acc_set_default_async;
+ __tgt_acc_set_device_num;
+ __tgt_acc_set_device_type;
+ __tgt_acc_wait;
+ __tgt_acc_init;
+ __tgt_acc_shutdown;
+
+ acc_get_num_devices;
+ acc_get_num_devices_;
+ acc_get_device_num;
+ acc_get_device_num_;
+ acc_set_device_num;
+ acc_set_device_num_;
+ acc_set_device_type;
+ acc_set_device_type_;
+ acc_set_device;
+ acc_set_device_;
+ acc_get_device_type;
+ acc_get_device_type_;
+ acc_get_device;
+ acc_get_device_;
+ acc_get_property;
+ acc_get_property_;
+ acc_get_property_string;
+ acc_get_property_string_;
+
+ acc_async_wait;
+ acc_async_wait_;
+ acc_wait_async;
+ acc_wait_async_;
+ acc_wait;
+ acc_wait_;
+ acc_wait_device;
+ acc_wait_device_;
+ acc_wait_all_async;
+ acc_wait_all_async_;
+ acc_async_wait_all;
+ acc_async_wait_all_;
+ acc_wait_all;
+ acc_wait_all_;
+ acc_wait_all_device;
+ acc_wait_all_device_;
+ acc_wait_any;
+ acc_wait_any_;
+ acc_wait_any_device;
+ acc_wait_any_device_;
+
+ acc_async_test;
+ acc_async_test_;
+ acc_async_test_device;
+ acc_async_test_device_;
+ acc_async_test_all;
+ acc_async_test_all_;
+ acc_async_test_all_device;
+ acc_async_test_all_device_;
+
+ acc_init;
+ acc_init_;
+ acc_init_device;
+ acc_init_device_;
+ acc_shutdown;
+ acc_shutdown_;
+ acc_shutdown_device;
+ acc_shutdown_device_;
+
+ acc_set_default_async;
+ acc_set_default_async_;
+ acc_get_default_async;
+ acc_get_default_async_;
+
+ acc_malloc;
+ acc_malloc_;
+ acc_free;
+ acc_free_;
+ acc_map_data;
+ acc_map_data_;
+ acc_unmap_data;
+ acc_unmap_data_;
+ acc_deviceptr;
+ acc_deviceptr_;
+ acc_hostptr;
+ acc_hostptr_;
+
+ acc_memcpy_from_device;
+ acc_memcpy_from_device_;
+ acc_memcpy_to_device;
+ acc_memcpy_to_device_;
+ acc_memcpy_d2d;
+ acc_memcpy_d2d_;
+
+ acc_on_device;
+ acc_on_device_;
+ acc_present_dump_all;
+ acc_present_dump_all_;
+ acc_attach_dump_all;
+ acc_attach_dump_all_;
+ acc_attach_dump;
+ acc_attach_dump_;
+
+ local:
+ *;
+};
diff --git a/offload/libacctarget/include/openacc.h b/offload/libacctarget/include/openacc.h
new file mode 100644
index 0000000000000..8f77d220a8d8e
--- /dev/null
+++ b/offload/libacctarget/include/openacc.h
@@ -0,0 +1,46 @@
+//===- openacc.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ACC_OFFLOAD_INCLUDE_OPENACC_H_
+#define LLVM_ACC_OFFLOAD_INCLUDE_OPENACC_H_
+
+#define acc_async_sync -1
+#define acc_async_default -3
+#define acc_async_noval -4
+
+typedef enum {
+ acc_device_none = 0,
+ acc_device_default = 1,
+ acc_device_host = 2,
+ acc_device_not_host = 3,
+ acc_device_current = 10,
+
+ acc_device_concrete_type_begin = 4,
+ acc_device_nvidia = 4,
+ acc_device_amd = 5,
+ acc_device_spirv = 6,
+ acc_device_concrete_type_end = 7,
+
+} acc_device_t;
+
+typedef enum {
+ acc_property_int_begin = 0,
+ acc_property_memory = 0,
+ acc_property_free_memory = 1,
+ acc_property_shared_memory_support = 2,
+ acc_property_int_end = 3,
+
+ acc_property_string_begin = 1000,
+ acc_property_name = 1000,
+ acc_property_vendor = 1001,
+ acc_property_driver = 1002,
+ acc_property_string_end = 1003,
+
+} acc_device_property_t;
+
+#endif // LLVM_ACC_OFFLOAD_INCLUDE_OPENACC_H_
More information about the llvm-branch-commits
mailing list