[llvm-branch-commits] [llvm] [offload] Add `libacctarget` OpenACC runtime (PR #198103)

Ivan R. Ivanov via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat May 16 08:13:29 PDT 2026


https://github.com/ivanradanov created https://github.com/llvm/llvm-project/pull/198103

The implementation is subject to change.

---

<sub>Stack created with <a href="https://github.com/github/gh-stack">GitHub Stacks CLI</a> • <a href="https://gh.io/stacks-feedback">Give Feedback 💬</a></sub>

>From 080b7442260fd2e80cd8c1b71e278863d1807f39 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <iivanov at nvidia.com>
Date: Sat, 16 May 2026 07:08:01 -0700
Subject: [PATCH] [offload] Add `libacctarget` OpenACC runtime

The implementation is subject to change.
---
 offload/CMakeLists.txt                        |    1 +
 offload/libacctarget/CMakeLists.txt           |   75 +
 .../libacctarget/CfiDataRuntimeInterface.cpp  |  297 +++
 offload/libacctarget/DataRuntimeInterface.cpp |  204 ++
 offload/libacctarget/Debug.h                  |   24 +
 offload/libacctarget/DeviceManager.cpp        |  231 ++
 offload/libacctarget/DeviceManager.h          |   75 +
 offload/libacctarget/Interface.cpp            | 2043 +++++++++++++++++
 offload/libacctarget/Interface.h              |  270 +++
 offload/libacctarget/Logger.h                 |   47 +
 offload/libacctarget/Private.h                |   29 +
 offload/libacctarget/QueueManager.cpp         |  179 ++
 offload/libacctarget/QueueManager.h           |   77 +
 offload/libacctarget/RuntimeImpl.cpp          |  175 ++
 offload/libacctarget/RuntimeInterface.cpp     |  248 ++
 offload/libacctarget/exports                  |  182 ++
 offload/libacctarget/include/openacc.h        |   46 +
 17 files changed, 4203 insertions(+)
 create mode 100644 offload/libacctarget/CMakeLists.txt
 create mode 100644 offload/libacctarget/CfiDataRuntimeInterface.cpp
 create mode 100644 offload/libacctarget/DataRuntimeInterface.cpp
 create mode 100644 offload/libacctarget/Debug.h
 create mode 100644 offload/libacctarget/DeviceManager.cpp
 create mode 100644 offload/libacctarget/DeviceManager.h
 create mode 100644 offload/libacctarget/Interface.cpp
 create mode 100644 offload/libacctarget/Interface.h
 create mode 100644 offload/libacctarget/Logger.h
 create mode 100644 offload/libacctarget/Private.h
 create mode 100644 offload/libacctarget/QueueManager.cpp
 create mode 100644 offload/libacctarget/QueueManager.h
 create mode 100644 offload/libacctarget/RuntimeImpl.cpp
 create mode 100644 offload/libacctarget/RuntimeInterface.cpp
 create mode 100644 offload/libacctarget/exports
 create mode 100644 offload/libacctarget/include/openacc.h

diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index e63e6bdfc03e3..bcf03b39bc7ca 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -299,6 +299,7 @@ add_subdirectory(docs)
 # Build target agnostic offloading library.
 add_subdirectory(libompaccsupport)
 add_subdirectory(libomptarget)
+add_subdirectory(libacctarget)
 add_subdirectory(liboffload)
 
 # Add tests.
diff --git a/offload/libacctarget/CMakeLists.txt b/offload/libacctarget/CMakeLists.txt
new file mode 100644
index 0000000000000..709b1b4789efc
--- /dev/null
+++ b/offload/libacctarget/CMakeLists.txt
@@ -0,0 +1,75 @@
+message(STATUS "Building OpenACC offloading runtime library libacctarget.")
+
+set(ACCTARGET_SRC
+  Interface.cpp
+
+  DeviceManager.cpp
+  QueueManager.cpp
+
+  RuntimeImpl.cpp
+  RuntimeInterface.cpp
+  DataRuntimeInterface.cpp
+  CfiDataRuntimeInterface.cpp
+
+)
+
+add_llvm_library(acctarget
+  SHARED
+
+  ${ACCTARGET_SRC}
+
+  ADDITIONAL_HEADER_DIRS
+  ${LIBOMPTARGET_INCLUDE_DIR}
+  ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
+
+  LINK_COMPONENTS
+  Support
+
+  LINK_LIBS
+  PUBLIC
+  ompaccsupport
+
+  NO_INSTALL_RPATH
+  BUILDTREE_ONLY
+)
+
+target_include_directories(acctarget PRIVATE
+  ${LIBOMPTARGET_INCLUDE_DIR}
+  ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
+  ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+if(LLVM_HAVE_LINK_VERSION_SCRIPT)
+  target_link_libraries(acctarget PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports")
+  set_property(TARGET acctarget APPEND PROPERTY LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/exports)
+endif()
+
+add_dependencies(acctarget PluginErrcodes)
+
+target_compile_definitions(acctarget PRIVATE
+  TARGET_NAME=acctarget
+  DEBUG_PREFIX="acctarget"
+)
+
+target_link_libraries(acctarget PRIVATE ompaccsupport)
+
+target_compile_options(acctarget PRIVATE ${offload_compile_flags})
+target_link_options(acctarget PRIVATE ${offload_link_flags})
+
+# Link against flang_rt.runtime for Fortran descriptor support.
+# flang_rt.runtime is a sibling runtime; link against the shared library target.
+if(TARGET flang_rt.runtime.dynamic)
+  target_link_libraries(acctarget PRIVATE flang_rt.runtime.dynamic)
+elseif(TARGET flang_rt.runtime.static)
+  target_link_libraries(acctarget PRIVATE flang_rt.runtime.static)
+else()
+  message(FATAL_ERROR "flang_rt.runtime target not found")
+endif()
+
+# libomptarget.so needs to be aware of where the plugins live as they
+# are now separated in the build directory.
+set_target_properties(acctarget PROPERTIES
+                      POSITION_INDEPENDENT_CODE ON
+                      INSTALL_RPATH "$ORIGIN"
+                      BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..")
+install(TARGETS acctarget LIBRARY COMPONENT acctarget DESTINATION "${OFFLOAD_INSTALL_LIBDIR}")
diff --git a/offload/libacctarget/CfiDataRuntimeInterface.cpp b/offload/libacctarget/CfiDataRuntimeInterface.cpp
new file mode 100644
index 0000000000000..46d75f111457c
--- /dev/null
+++ b/offload/libacctarget/CfiDataRuntimeInterface.cpp
@@ -0,0 +1,297 @@
+//===- CfiAccEntryImpl.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeviceManager.h"
+#include "Logger.h"
+#include "Private.h"
+#include "omptarget.h"
+
+#include "Interface.h"
+#include "include/openacc.h"
+
+using namespace llvm::acc::target;
+using namespace llvm::acc::target::debug;
+
+#define PREAMBLE()                                                             \
+  FUNC_LOGGER();                                                               \
+  AccDataDescF18 AccDesc{{TGT_ACC_DESC_F18}, &Desc->raw()};                    \
+  AccDataDescF18 *AccDataDescs[] = {&AccDesc};                                 \
+  void *ArgPtrs[] = {reinterpret_cast<void *>(&Desc->raw())};                  \
+  void *ArgBasePtrs[] = {nullptr};                                             \
+  int64_t ArgSizes[] = {0};                                                    \
+  int64_t ArgTypes[] = {TGT_ACC_MAPTYPE_NONE};
+
+extern "C" {
+int _cfi_acc_is_present_a(const Fortran::runtime::Descriptor *Desc) {
+  return accIsPresent(Desc->OffsetElement());
+}
+
+int _cfi_acc_create_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                       acc_async_sync);
+  return 0;
+}
+int _cfi_acc_create_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+int _cfi_acc_pcreate_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                       acc_async_sync);
+  return 0;
+}
+int _cfi_acc_pcreate_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+int _cfi_acc_present_or_create_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                       acc_async_sync);
+  return 0;
+}
+int _cfi_acc_present_or_create_async_a(Fortran::runtime::Descriptor *Desc,
+                                       int *Async) {
+  PREAMBLE();
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_delete_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+  return 0;
+}
+int _cfi_acc_delete_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+int _cfi_acc_delete_finalize_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+  return 0;
+}
+int _cfi_acc_delete_finalize_async_a(Fortran::runtime::Descriptor *Desc,
+                                     int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_copyin_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                       acc_async_sync);
+  return 0;
+}
+int _cfi_acc_copyin_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+int _cfi_acc_pcopyin_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                       acc_async_sync);
+  return 0;
+}
+int _cfi_acc_pcopyin_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+int _cfi_acc_present_or_copyin_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                       acc_async_sync);
+  return 0;
+}
+int _cfi_acc_present_or_copyin_async_a(Fortran::runtime::Descriptor *Desc,
+                                       int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_enter(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                       ArgSizes, ArgTypes, nullptr, nullptr,
+                       reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_copyout_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+  return 0;
+}
+int _cfi_acc_copyout_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_copyout_finalize_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+  return 0;
+}
+int _cfi_acc_copyout_finalize_async_a(Fortran::runtime::Descriptor *Desc,
+                                      int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_update_device_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+  return 0;
+}
+int _cfi_acc_update_device_async_a(Fortran::runtime::Descriptor *Desc,
+                                   int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_updatein_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+  return 0;
+}
+int _cfi_acc_updatein_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_update_self_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+  return 0;
+}
+int _cfi_acc_update_self_async_a(Fortran::runtime::Descriptor *Desc,
+                                 int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_update_host_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+  return 0;
+}
+int _cfi_acc_update_host_async_a(Fortran::runtime::Descriptor *Desc,
+                                 int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+
+int _cfi_acc_updateout_a(Fortran::runtime::Descriptor *Desc) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+  return 0;
+}
+int _cfi_acc_updateout_async_a(Fortran::runtime::Descriptor *Desc, int *Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), *Async);
+  return 0;
+}
+}
diff --git a/offload/libacctarget/DataRuntimeInterface.cpp b/offload/libacctarget/DataRuntimeInterface.cpp
new file mode 100644
index 0000000000000..e6111932e8e77
--- /dev/null
+++ b/offload/libacctarget/DataRuntimeInterface.cpp
@@ -0,0 +1,204 @@
+//===- AccEntryImpl.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Logger.h"
+#include "Private.h"
+
+#include "Interface.h"
+#include "include/openacc.h"
+
+using namespace llvm::acc::target;
+using namespace llvm::acc::target::debug;
+
+#define PREAMBLE()                                                             \
+  FUNC_LOGGER();                                                               \
+  AccDataDescF18 *AccDataDescs[] = {nullptr};                                  \
+  void *ArgPtrs[] = {Ptr};                                                     \
+  void *ArgBasePtrs[] = {nullptr};                                             \
+  int64_t ArgSizes[] = {static_cast<int64_t>(Bytes)};                          \
+  int64_t ArgTypes[] = {TGT_ACC_MAPTYPE_NONE};
+
+extern "C" {
+int acc_is_present(void *Ptr) { return accIsPresent(Ptr); }
+
+void *acc_create(void *Ptr, size_t Bytes) {
+  return accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_NONE,
+                      acc_async_sync);
+}
+void acc_create_async(void *Ptr, size_t Bytes, int Async) {
+  accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_NONE, acc_async_sync);
+}
+void *acc_pcreate(void *Ptr, size_t Bytes) { return acc_create(Ptr, Bytes); }
+void acc_pcreate_async(void *Ptr, size_t Bytes, int Async) {
+  acc_create_async(Ptr, Bytes, Async);
+}
+void *acc_present_or_create(void *Ptr, size_t Bytes) {
+  return acc_create(Ptr, Bytes);
+}
+void acc_present_or_create_async(void *Ptr, size_t Bytes, int Async) {
+  acc_create_async(Ptr, Bytes, Async);
+}
+
+void acc_delete(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+}
+void acc_delete_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+void acc_delete_finalize(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+}
+void acc_delete_finalize_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void *acc_copyin(void *Ptr, size_t Bytes) {
+  return accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_TO, acc_async_sync);
+}
+void acc_copyin_async(void *Ptr, size_t Bytes, int Async) {
+  accDataEnter(nullptr, Ptr, Bytes, TGT_ACC_MAPTYPE_TO, acc_async_sync);
+}
+void *acc_pcopyin(void *Ptr, size_t Bytes) { return acc_copyin(Ptr, Bytes); }
+void acc_pcopyin_async(void *Ptr, size_t Bytes, int Async) {
+  acc_copyin_async(Ptr, Bytes, Async);
+}
+void *acc_present_or_copyin(void *Ptr, size_t Bytes) {
+  return acc_copyin(Ptr, Bytes);
+}
+void acc_present_or_copyin_async(void *Ptr, size_t Bytes, int Async) {
+  acc_copyin_async(Ptr, Bytes, Async);
+}
+
+void acc_copyout(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+}
+void acc_copyout_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_copyout_finalize(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                      acc_async_sync);
+}
+void acc_copyout_finalize_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM | TGT_ACC_MAPTYPE_FINALIZE;
+  __tgt_acc_data_exit(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                      ArgSizes, ArgTypes, nullptr, nullptr,
+                      reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_update_device(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+}
+void acc_update_device_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_updatein(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+}
+void acc_updatein_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_TO;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_update_self(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+}
+void acc_update_self_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_update_host(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+}
+void acc_update_host_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+
+void acc_updateout(void *Ptr, size_t Bytes) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs),
+                        acc_async_sync);
+}
+void acc_updateout_async(void *Ptr, size_t Bytes, int Async) {
+  PREAMBLE();
+  ArgTypes[0] = TGT_ACC_MAPTYPE_FROM;
+  __tgt_acc_data_update(nullptr, 0, acc_device_default, 1, ArgBasePtrs, ArgPtrs,
+                        ArgSizes, ArgTypes, nullptr, nullptr,
+                        reinterpret_cast<AccDataDesc **>(AccDataDescs), Async);
+}
+}
diff --git a/offload/libacctarget/Debug.h b/offload/libacctarget/Debug.h
new file mode 100644
index 0000000000000..917b1f6ce6e2e
--- /dev/null
+++ b/offload/libacctarget/Debug.h
@@ -0,0 +1,24 @@
+//===- Debug.h --------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBACCTARGET_DEBUG_H_
+#define _LIBACCTARGET_DEBUG_H_
+
+namespace llvm::acc::target::debug {
+
+// Debug types to use in libacctarget
+constexpr const char *ADT_Init = "ACCInit";
+constexpr const char *ADT_Mapping = "ACCMapping";
+constexpr const char *ADT_Descriptor = "ACCDescriptor";
+constexpr const char *ADT_Queue = "ACCQueue";
+constexpr const char *ADT_Interface = "ACCInterface";
+constexpr const char *ADT_Kernel = "ACCKernel";
+
+} // namespace llvm::acc::target::debug
+
+#endif // _LIBACCTARGET_DEBUG_H_
diff --git a/offload/libacctarget/DeviceManager.cpp b/offload/libacctarget/DeviceManager.cpp
new file mode 100644
index 0000000000000..d546c035f8331
--- /dev/null
+++ b/offload/libacctarget/DeviceManager.cpp
@@ -0,0 +1,231 @@
+//===- DeviceManager.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeviceManager.h"
+#include "PluginManager.h"
+#include "openacc.h"
+
+// OpenACC 3.4, sec. 2.3.1 "Modifying and Retrieving ICV Values"
+// Each host thread needs its own value, thus these are `thread_local`.
+//
+// The DeviceManager owns these ICVs and they should not be accessible outside
+// and are thus static.
+namespace llvm::acc::target::icv {
+/// OpenACC 3.4, sec. 2.3 "Internal Control Variables"
+/// "acc-current-device-num-var - controls which device of the selected type is
+/// used."
+/// TODO can we use PerThreadTable here?
+static thread_local std::array<DeviceManagerTy::DeviceIdTy,
+                               AccDeviceNumConcreteTypes>
+    AccCurrentDeviceNumVar = {0};
+/// OpenACC 3.4, sec. 2.3 "Internal Control Variables"
+/// "acc-current-device-type-var - controls which type of device is used."
+static thread_local acc_device_t AccCurrentDeviceTypeVar = acc_device_default;
+/// The device type to use when the default is asked for. Initially we set it to
+/// none. When the plugins get initialized we will set the default to one of the
+/// target device types we have available.
+static acc_device_t AccCurrentDefaultDeviceTypeVar = acc_device_none;
+
+} // namespace llvm::acc::target::icv
+
+namespace llvm::acc::target {
+DeviceManagerTy *DM = nullptr;
+} // namespace llvm::acc::target
+
+using namespace llvm::acc::target;
+
+static const char *accDeviceToStr(acc_device_t DeviceType) {
+  switch (DeviceType) {
+  case acc_device_nvidia:
+    return "nvidia";
+  case acc_device_amd:
+    return "amd";
+  case acc_device_spirv:
+    return "spirv";
+  case acc_device_none:
+    return "<none>";
+  case acc_device_default:
+    return "<default>";
+  case acc_device_host:
+    return "<host>";
+  case acc_device_not_host:
+    return "<not_host>";
+  default:
+    return "<unknown>";
+  }
+}
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                                     acc_device_t DeviceType) {
+  return OS << accDeviceToStr(DeviceType) << " ("
+            << static_cast<int>(DeviceType) << ")";
+}
+
+DeviceManagerTy::SingleDeviceTypeMapTy &
+DeviceManagerTy::getSingleDeviceTypeMap(acc_device_t DeviceType) {
+  return PMDeviceMap[DeviceType - AccDeviceTypeOffset];
+}
+
+void DeviceManagerTy::init() {
+  refreshDeviceMapping(/*UpdateDeviceType=*/true);
+}
+
+void DeviceManagerTy::deinit() {}
+
+void DeviceManagerTy::refreshDeviceMapping(bool UpdateDeviceType) {
+  assert(this);
+
+  for (int DeviceTypeInt = acc_device_concrete_type_begin;
+       DeviceTypeInt < acc_device_concrete_type_end; DeviceTypeInt++)
+    getSingleDeviceTypeMap(acc_device_nvidia).resize(0);
+
+  auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor();
+  for (DeviceTy &Device : PM->devices(ExclusiveDevicesAccessor)) {
+    if (Device.RTL->getTripleArch() == llvm::Triple::nvptx64) {
+      getSingleDeviceTypeMap(acc_device_nvidia).push_back(Device.DeviceID);
+    } else if (Device.RTL->getTripleArch() == llvm::Triple::amdgcn) {
+      getSingleDeviceTypeMap(acc_device_amd).push_back(Device.DeviceID);
+    } else if (Device.RTL->getTripleArch() == llvm::Triple::spirv64) {
+      getSingleDeviceTypeMap(acc_device_spirv).push_back(Device.DeviceID);
+    }
+  }
+
+  ODBG() << "Refreshed OpenACC devices:";
+  for (int DeviceTypeInt = acc_device_concrete_type_begin;
+       DeviceTypeInt < acc_device_concrete_type_end; DeviceTypeInt++) {
+    acc_device_t DeviceType = static_cast<acc_device_t>(DeviceTypeInt);
+    unsigned Num = getSingleDeviceTypeMap(DeviceType).size();
+    ODBG() << "  Type " << DeviceType;
+    for (unsigned I = 0; I < Num; I++) {
+      ODBG() << "    OpenACC Device #" << I << " -> PM Device #"
+             << getSingleDeviceTypeMap(DeviceType)[I];
+    }
+  }
+
+  if (UpdateDeviceType) {
+    // Set the default current device type to a device we have available in the
+    // below order of priority.
+    auto CheckType = [&](acc_device_t Type) {
+      if (getSingleDeviceTypeMap(Type).size() > 0) {
+        assert(Type >= acc_device_concrete_type_begin &&
+               Type < acc_device_concrete_type_end &&
+               "We should only set AccCurrentDefaultDeviceTypeVar to a "
+               "concrete type");
+        ODBG() << "Updating AccCurrentDefaultDeviceTypeVar to " << Type;
+        icv::AccCurrentDefaultDeviceTypeVar = Type;
+        return true;
+      }
+      return false;
+    };
+    false || CheckType(acc_device_nvidia) || CheckType(acc_device_amd) ||
+        CheckType(acc_device_spirv) || CheckType(acc_device_host);
+  }
+}
+
+int DeviceManagerTy::getPMDeviceId(acc_device_t DeviceType) {
+  ODBG() << "Getting device for " << DeviceType;
+  if (DeviceType == acc_device_none) {
+    DeviceType = icv::AccCurrentDeviceTypeVar;
+    ODBG() << "Correcting to current type " << DeviceType;
+  }
+  if (DeviceType == acc_device_default) {
+    ODBG() << "Corrected to value of AccCurrentDefaultDeviceTypeVar: "
+           << icv::AccCurrentDefaultDeviceTypeVar;
+    DeviceType = icv::AccCurrentDefaultDeviceTypeVar;
+  }
+  ODBG() << "Current device has id " << icv::AccCurrentDeviceNumVar[DeviceType];
+  checkICVs();
+  return getSingleDeviceTypeMap(
+      DeviceType)[icv::AccCurrentDeviceNumVar[DeviceType]];
+}
+
+int DeviceManagerTy::getPMDeviceId() {
+  ODBG() << "Getting current device, type " << icv::AccCurrentDeviceTypeVar;
+  checkICVs();
+  return getPMDeviceId(icv::AccCurrentDeviceTypeVar);
+}
+
+int DeviceManagerTy::getDeviceId(acc_device_t DeviceType) {
+  checkICVs();
+  return icv::AccCurrentDeviceNumVar[DeviceType];
+}
+
+void DeviceManagerTy::checkICVs() {
+  ODBG() << "acc-current-device-type = " << icv::AccCurrentDeviceTypeVar;
+  for (int DeviceTypeInt = acc_device_concrete_type_begin;
+       DeviceTypeInt < acc_device_concrete_type_end; DeviceTypeInt++) {
+    acc_device_t DeviceType = static_cast<acc_device_t>(DeviceTypeInt);
+    ODBG() << "acc-current-device-num[" << DeviceType
+           << "] = " << icv::AccCurrentDeviceNumVar[DeviceType];
+  }
+  ODBG() << "acc-current-device-type = " << icv::AccCurrentDeviceTypeVar;
+  assert(icv::AccCurrentDeviceTypeVar == acc_device_default ||
+         (icv::AccCurrentDeviceTypeVar >= acc_device_concrete_type_begin &&
+          icv::AccCurrentDeviceTypeVar < acc_device_concrete_type_end));
+  acc_device_t DeviceType = icv::AccCurrentDeviceTypeVar;
+  if (DeviceType == acc_device_default) {
+    DeviceType = icv::AccCurrentDefaultDeviceTypeVar;
+    ODBG() << "Corrected to value of AccCurrentDefaultDeviceTypeVar: "
+           << icv::AccCurrentDefaultDeviceTypeVar;
+  }
+  ODBG() << icv::AccCurrentDeviceNumVar[DeviceType];
+  assert(icv::AccCurrentDeviceNumVar[DeviceType] <
+         static_cast<int64_t>(getSingleDeviceTypeMap(DeviceType).size()));
+}
+
+int DeviceManagerTy::getNumDevices(acc_device_t DeviceType) {
+  checkICVs();
+  return getSingleDeviceTypeMap(DeviceType).size();
+}
+
+void DeviceManagerTy::setAllDeviceId(int DevNum) {
+  for (auto &CurrDevNum : icv::AccCurrentDeviceNumVar) {
+    CurrDevNum = DevNum;
+  }
+  checkICVs();
+}
+
+void DeviceManagerTy::setDeviceId(acc_device_t DeviceType, int DevNum) {
+  icv::AccCurrentDeviceNumVar[DeviceType] = DevNum;
+  checkICVs();
+}
+
+void DeviceManagerTy::setDeviceId(int DevNum) {
+  setDeviceId(icv::AccCurrentDeviceTypeVar, DevNum);
+  checkICVs();
+}
+
+acc_device_t DeviceManagerTy::getDeviceType() {
+  checkICVs();
+  return icv::AccCurrentDeviceTypeVar;
+}
+
+void DeviceManagerTy::setDeviceType(acc_device_t DeviceType) {
+  icv::AccCurrentDeviceTypeVar = DeviceType;
+  checkICVs();
+}
+
+size_t DeviceManagerTy::getDeviceProperty(int, acc_device_t,
+                                          acc_device_property_t) {
+  REPORT_FATAL() << "device properties not yet implemented";
+  return 0;
+}
+
+const char *DeviceManagerTy::getDevicePropertyString(int, acc_device_t,
+                                                     acc_device_property_t) {
+  REPORT_FATAL() << "device properties not yet implemented";
+  return "";
+}
+
+llvm::Expected<DeviceTy &> DeviceManagerTy::getDevice(acc_device_t DeviceType) {
+  return PM->getDevice(getPMDeviceId(DeviceType));
+}
+
+llvm::Expected<DeviceTy &> DeviceManagerTy::getDevice() {
+  return PM->getDevice(getPMDeviceId());
+}
diff --git a/offload/libacctarget/DeviceManager.h b/offload/libacctarget/DeviceManager.h
new file mode 100644
index 0000000000000..d825eeb1c2160
--- /dev/null
+++ b/offload/libacctarget/DeviceManager.h
@@ -0,0 +1,75 @@
+//===- DeviceManager.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ACC_OFFLOAD_DEVICE_MANAGER_H_
+#define LLVM_ACC_OFFLOAD_DEVICE_MANAGER_H_
+
+#include "include/openacc.h"
+#include "omptarget.h"
+#include <array>
+#include <cstddef>
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                                     acc_device_t DeviceType);
+
+namespace llvm::acc::target {
+
+constexpr int AccDeviceNumConcreteTypes =
+    acc_device_concrete_type_end - acc_device_concrete_type_begin;
+constexpr int AccDeviceTypeOffset = acc_device_concrete_type_begin;
+
+class DeviceManagerTy {
+public:
+  using DeviceIdTy = int64_t;
+
+private:
+  using SingleDeviceTypeMapTy = llvm::SmallVector<DeviceIdTy, 8>;
+  using AllDeviceTypeMap =
+      std::array<SingleDeviceTypeMapTy, AccDeviceNumConcreteTypes>;
+  AllDeviceTypeMap PMDeviceMap;
+
+  SingleDeviceTypeMapTy &getSingleDeviceTypeMap(acc_device_t DeviceType);
+
+public:
+  void init();
+  void deinit();
+
+  // Refreshes the device mapping according to the
+  void refreshDeviceMapping(bool UpdateDeviceType);
+
+  // Entry points for ACC APIs.
+  int getDeviceId(acc_device_t DeviceType);
+  int getNumDevices(acc_device_t DeviceType);
+
+  void setAllDeviceId(int DeviceId);
+  void setDeviceId(acc_device_t DeviceType, int DeviceId);
+  void setDeviceId(int DeviceId);
+
+  acc_device_t getDeviceType();
+  void setDeviceType(acc_device_t DeviceType);
+
+  size_t getDeviceProperty(int DeviceId, acc_device_t DeviceType,
+                           acc_device_property_t DeviceProperty);
+  const char *getDevicePropertyString(int DeviceId, acc_device_t DeviceType,
+                                      acc_device_property_t DeviceProperty);
+
+  // Verification.
+  void checkICVs();
+
+  // Obtaining the device ID for use with PluginManager.
+  int getPMDeviceId(acc_device_t DeviceType);
+  int getPMDeviceId();
+
+  llvm::Expected<DeviceTy &> getDevice(acc_device_t DeviceType);
+  llvm::Expected<DeviceTy &> getDevice();
+};
+
+extern DeviceManagerTy *DM;
+} // namespace llvm::acc::target
+
+#endif // LLVM_ACC_OFFLOAD_DEVICE_MANAGER_H_
diff --git a/offload/libacctarget/Interface.cpp b/offload/libacctarget/Interface.cpp
new file mode 100644
index 0000000000000..002813635cc00
--- /dev/null
+++ b/offload/libacctarget/Interface.cpp
@@ -0,0 +1,2043 @@
+//===- Interface.cpp --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interface.h"
+#include "Debug.h"
+#include "DeviceManager.h"
+#include "Logger.h"
+#include "OpenMP/Mapping.h"
+#include "PluginManager.h"
+#include "Private.h"
+#include "QueueManager.h"
+#include "Shared/APITypes.h"
+#include "Shared/Debug.h"
+#include "Shared/SourceInfo.h"
+#include "device.h"
+#include "omptarget.h"
+#include "openacc.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <optional>
+#include <sstream>
+#include <string.h>
+#include <string>
+#include <type_traits>
+#include <variant>
+#include <vector>
+
+using namespace llvm::acc::target;
+using namespace llvm::acc::target::debug;
+
+using llvm::SmallVector;
+
+namespace {
+constexpr int32_t AccAsyncSync = acc_async_sync;
+constexpr int32_t AccAsyncDefault = acc_async_default;
+constexpr int32_t AccAsyncNoval = acc_async_noval;
+constexpr int32_t AccAsyncDefaultQueue = -5;
+} // namespace
+
+namespace llvm::acc::target::icv {
+// acc-default-async-var
+thread_local int32_t AccDefaultAsyncVar = AccAsyncDefaultQueue;
+} // namespace llvm::acc::target::icv
+
+namespace {
+// TODO hook up to some env var
+bool Pedantic = true;
+
+#define STR_AND_VAL(X) #X << " " << X
+#define STR_AND_VALI(X) #X << " " << (int64_t)X
+#define OPT_STR_AND_VAL(X) #X << " " << (X ? std::to_string(*X) : "(nil)")
+#define SOPT_STR_AND_VAL(X) #X << " " << (X ? X : "(nil)")
+
+struct DescMappingInfoTy {
+  // The size of the descriptor
+  size_t DescriptorSize = 0;
+  // The offset in the host descriptor where the pointer to the raw memory is
+  // stored.
+  size_t RawMemoryPtrOffset = 0;
+};
+
+struct MemMappingInfoTy {
+  void *RawMemoryPtr = nullptr;
+  void *RawMemoryBasePtr = nullptr;
+  std::optional<uint64_t> RawMemorySize = std::nullopt;
+  std::optional<NonContigDescTy> CopyDesc = std::nullopt;
+
+  ptrdiff_t getBaseDelta() {
+    ptrdiff_t Delta = reinterpret_cast<intptr_t>(RawMemoryPtr) -
+                      reinterpret_cast<intptr_t>(RawMemoryBasePtr);
+    return Delta;
+  }
+  void verify() { assert(RawMemoryPtr); }
+  void dump(llvm::raw_ostream &OS) {
+    OS << "MemMappingInfoTy:\n";
+    // clang-format off
+    OS
+        << " " << STR_AND_VAL(RawMemoryPtr)
+        << " " << STR_AND_VAL(RawMemoryBasePtr)
+        << " " << OPT_STR_AND_VAL(RawMemorySize)
+        << " " << !!CopyDesc
+        << "\n";
+    // clang-format on
+  }
+};
+
+struct AccArrayDim {
+  long Offset;
+  long Stride;
+  long Size;
+  long Extent;
+};
+
+struct ArrayInfo {
+  std::vector<AccArrayDim> Dims;
+  // The size of the raw memory allocation.
+  std::optional<uint64_t> RawMemorySize = {};
+  // The address of the host memory to be copied.
+  void *RawMemoryAddr = nullptr;
+  // Size of the array element.
+  int64_t ElementSize = 0;
+
+  void setPtr(void *Ptr) { RawMemoryAddr = Ptr; }
+
+  std::optional<size_t> getSizeInDim(ident_t *Loc, unsigned I) {
+    if (Dims[I].Stride < 0) {
+      REPORT_FATAL() << Loc << "Unsupported negative stride";
+    }
+
+    auto TrySize = [&](int64_t Size) -> std::optional<size_t> {
+      if (Size == -1) {
+        return std::nullopt;
+      }
+      return Size * Dims[I].Stride * ElementSize;
+    };
+
+    // Prefer the `size` instead of `extent`. This is due to cases like this:
+    //
+    // real a0(100)
+    // call acc_copyin(a0(1:99))
+    // !$acc present(a0(1:99))
+    //
+    // Where the `acc_copyin` will allocate space for 99 elements because we
+    // parse the flang descriptor which only contains information on the size
+    // (99), but then if we use the `extent` from the `acc present`, we would
+    // require 100 elements, which is larger than the previously allocated
+    // memory. Thus, we use the `size`.
+
+    if (auto Size = TrySize(Dims[I].Size)) {
+      return Size;
+    }
+    if (auto Size = TrySize(Dims[I].Extent)) {
+      return Size;
+    }
+
+    return std::nullopt;
+  }
+
+  void computeSizeFromDims(ident_t *Loc) {
+    if (Dims.size() == 0) {
+      RawMemorySize = ElementSize;
+      return;
+    }
+
+    std::optional<size_t> LargestSize = getSizeInDim(Loc, Dims.size() - 1);
+    RawMemorySize = LargestSize;
+    ODBG(ADT_Descriptor) << "Computed " << OPT_STR_AND_VAL(RawMemorySize);
+
+#ifndef NDEBUG
+    if (!LargestSize) {
+      return;
+    }
+    for (unsigned I = 0; I < Dims.size() - 1; I++) {
+      auto Size = getSizeInDim(Loc, I);
+      assert(!Size || *Size <= *LargestSize);
+    }
+#endif
+  }
+
+  void normalize() {
+    normalizeStrides();
+    normalizeOffsets();
+  }
+
+  bool hasNormalizedStrides() {
+    for (std::size_t i = 0; i < Dims.size(); i++) {
+      if (Dims[i].Stride < 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void normalizeStrides() {
+    FUNC_LOGGER();
+    ODBG_IF([&]() { dump(llvm::dbgs()); });
+
+    if (hasNormalizedStrides()) {
+      ODBG(ADT_Descriptor) << "No normalization needed.";
+      return;
+    }
+    ODBG(ADT_Descriptor) << "Descriptor needs normalization.";
+
+    // The runtime cannot map negative stride arrays.  So we must find the base
+    // address of the host pointer and then invert the descriptor so that the
+    // strides in all dimensions are positive. The base pointer delta will be
+    // used to attach the adjusted device pointer to the array descriptor - that
+    // is, the F18 descriptor will contain the end address of the array because
+    // that is what the compiler assumes.
+    int64_t baseHostPtrDeltaInBytes = 0;
+    for (std::size_t i = 0; i < Dims.size(); i++) {
+      if (Dims[i].Stride < 0) {
+        Dims[i].Stride = -Dims[i].Stride;
+        Dims[i].Offset = Dims[i].Extent - Dims[i].Size - Dims[i].Offset;
+
+        // For each negative stride, skip to previously accounted array.
+        baseHostPtrDeltaInBytes += Dims[i].Stride * (Dims[i].Extent - 1);
+      }
+    }
+
+    baseHostPtrDeltaInBytes *= ElementSize;
+
+    RawMemoryAddr =
+        reinterpret_cast<char *>(RawMemoryAddr) - baseHostPtrDeltaInBytes;
+
+    ODBG(ADT_Descriptor) << "Normalized:";
+    ODBG_IF([&]() { dump(llvm::dbgs()); });
+  }
+
+  bool hasNormalizedOffsets() {
+    for (std::size_t i = 0; i < Dims.size(); i++) {
+      if (Dims[i].Offset != 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void normalizeOffsets() {
+    FUNC_LOGGER();
+    ODBG_IF([&]() { dump(llvm::dbgs()); });
+    assert(hasNormalizedStrides());
+
+    if (hasNormalizedOffsets()) {
+      ODBG(ADT_Descriptor) << "No normalization needed.";
+      return;
+    }
+    ODBG(ADT_Descriptor) << "Descriptor needs normalization.";
+
+    int64_t baseHostPtrDeltaInBytes = 0;
+    for (auto &Dim : Dims) {
+      if (Dim.Offset != 0) {
+        baseHostPtrDeltaInBytes += Dim.Offset * Dim.Stride;
+        Dim.Offset = 0;
+      }
+    }
+
+    baseHostPtrDeltaInBytes *= ElementSize;
+
+    RawMemoryAddr =
+        reinterpret_cast<char *>(RawMemoryAddr) + baseHostPtrDeltaInBytes;
+
+    ODBG(ADT_Descriptor) << "Normalized:";
+    ODBG_IF([&]() { dump(llvm::dbgs()); });
+  }
+
+  void verify() {
+    assert(ElementSize > 0);
+    for (unsigned I = 0; I < Dims.size() - 1; I++) {
+      assert(Dims[I].Stride < Dims[I + 1].Stride &&
+             "Expected dimensions to be sorted");
+    }
+  }
+
+  void dump(llvm::raw_ostream &OS) {
+    OS << "ArrayInfo:\n";
+    for (unsigned I = 0; I < Dims.size(); I++) {
+      // clang-format off
+      OS << "      Dim " << I
+        << "\t" << STR_AND_VAL(Dims[I].Offset)
+        << "\t" << STR_AND_VAL(Dims[I].Size)
+        << "\t" << STR_AND_VAL(Dims[I].Stride)
+        << "\t" << STR_AND_VAL(Dims[I].Extent)
+        << "\n";
+      // clang-format on
+    }
+    // clang-format off
+    OS << "    "
+        << " " << STR_AND_VAL(RawMemoryAddr)
+        << " " << OPT_STR_AND_VAL(RawMemorySize)
+        << " " << STR_AND_VAL(ElementSize)
+        << "\n";
+    // clang-format on
+  };
+
+  /// See the llvm-project/offload/test/offloading/non_contiguous_update.cpp
+  /// test for examples.
+  std::optional<NonContigDescTy> generateNonContigCopyDesc(ident_t *Loc) {
+    NonContigDescTy CopyDesc;
+    CopyDesc.Dims.reserve(Dims.size() + 1);
+
+    for (int I = Dims.size() - 1; I >= 0; I--) {
+      auto const &Dim = Dims[I];
+      if (Dim.Size < 0) {
+        ODBG(ADT_Descriptor)
+            << "Dim size missing, cannot build copy descriptor";
+        return std::nullopt;
+      }
+      CopyDesc.Dims.push_back({});
+      auto &LastDim = CopyDesc.Dims.back();
+      LastDim.Count = Dim.Size;
+      LastDim.Stride = Dim.Stride * ElementSize;
+      LastDim.Offset = Dim.Offset * LastDim.Stride;
+    }
+
+    CopyDesc.Dims.push_back({});
+    auto &LastDim = CopyDesc.Dims.back();
+    LastDim.Count = ElementSize;
+    LastDim.Offset = 0;
+    LastDim.Stride = 1;
+
+    return CopyDesc;
+  }
+};
+
+struct MaterializedMemRefDesc {
+  void *allocatedPtr;
+  void *alignedPtr;
+  uint64_t offset;
+  int64_t elementSize;
+  unsigned char rank;
+  const uint64_t *sizes;
+  const uint64_t *strides;
+};
+
+void dump(const MaterializedMemRefDesc &Desc, llvm::raw_ostream &OS) {
+  // clang-format off
+  OS
+      << " " << STR_AND_VAL(Desc.allocatedPtr)
+      << " " << STR_AND_VAL(Desc.alignedPtr)
+      << " " << STR_AND_VAL(Desc.offset)
+      << " " << STR_AND_VAL(Desc.elementSize)
+      << " " << STR_AND_VALI(Desc.rank)
+      << "\n";
+  for (unsigned I = 0; I < Desc.rank; I++) {
+    OS << "Dim " << I
+        << " " << STR_AND_VAL(Desc.sizes[I])
+        << " " << STR_AND_VAL(Desc.strides[I])
+        << "\n";
+  }
+  // clang-format on
+}
+
+void dump(const AccDataDescOpenACC &Desc, llvm::raw_ostream &OS) {
+  // clang-format off
+  OS
+      << " " << STR_AND_VAL(Desc.Base.Version)
+      << " " << STR_AND_VALI(Desc.Rank)
+      << " " << STR_AND_VAL(Desc.ElementSize)
+      << "\n";
+  for (unsigned I = 0; I < Desc.Rank; I++) {
+    OS << "Dim " << I
+        << " " << STR_AND_VAL(Desc.LowerBounds[I])
+        << " " << STR_AND_VAL(Desc.UpperBounds[I])
+        << " " << STR_AND_VAL(Desc.Extents[I])
+        << " " << STR_AND_VAL(Desc.StridesInBytes[I])
+        << " " << STR_AND_VAL(Desc.StartIndices[I])
+        << "\n";
+  }
+  // clang-format on
+}
+
+template <class... Ts> struct overloads : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts> overloads(Ts...) -> overloads<Ts...>;
+
+std::string asyncToString(int64_t Async) {
+  if (Async >= 0) {
+    return "STREAM(" + std::to_string(Async) + ")";
+  } else if (Async == AccAsyncSync) {
+    return "SYNC";
+  } else if (Async == AccAsyncDefault) {
+    return "DEFAULT";
+  } else if (Async == AccAsyncNoval) {
+    return "NOVAL";
+  } else {
+    return "UNKNOWN";
+  }
+}
+
+std::string mapTypeToString(int64_t Type) {
+  std::stringstream SS;
+
+  if (Type & TGT_ACC_MAPTYPE_TO)
+    SS << "TO ";
+  if (Type & TGT_ACC_MAPTYPE_FROM)
+    SS << "FROM ";
+  if (Type & TGT_ACC_MAPTYPE_FINALIZE)
+    SS << "DELETE ";
+  if (Type & TGT_ACC_MAPTYPE_PTR_AND_OBJ)
+    SS << "PTR_AND_OBJ ";
+  if (Type & TGT_ACC_MAPTYPE_PRIVATE)
+    SS << "PRIVATE ";
+  if (Type & TGT_ACC_MAPTYPE_LITERAL)
+    SS << "LITERAL ";
+  if (Type & TGT_ACC_MAPTYPE_DEVPTR)
+    SS << "DEVPTR ";
+  if (Type & TGT_ACC_MAPTYPE_MANAGED_DEVPTR)
+    SS << "MANAGED_DEVPTR ";
+  if (Type & TGT_ACC_MAPTYPE_NO_CREATE)
+    SS << "NO_CREATE ";
+  if (Type & TGT_ACC_MAPTYPE_GANG_PRIVATE)
+    SS << "GANG_PRIVATE ";
+  if (Type & TGT_ACC_MAPTYPE_WORKER_PRIVATE)
+    SS << "WORKER_PRIVATE ";
+  if (Type & TGT_ACC_MAPTYPE_VECTOR_PRIVATE)
+    SS << "VECTOR_PRIVATE ";
+  if (Type & TGT_ACC_MAPTYPE_INIT_ZERO)
+    SS << "INIT_ZERO ";
+  if (Type & TGT_ACC_MAPTYPE_DEVICE_RESIDENT)
+    SS << "DEVICE_RESIDENT ";
+  if (Type & TGT_ACC_MAPTYPE_IF_PRESENT)
+    SS << "IF_PRESENT ";
+
+  std::string Str = SS.str();
+  if (Str.empty())
+    return "(none)";
+  else
+    // Remove trailing space.
+    Str.resize(Str.size() - 1);
+
+  return Str;
+}
+
+enum class AccCopyOutType { Always, OnDelete, Never };
+enum class AccRefCountingType { Dynamic, Structured };
+
+struct PostProcessingInfo {
+  /// The target pointer information.
+  TargetPointerResultTy TPR;
+  int64_t DataSize;
+  bool ShouldRestoreShadow;
+  bool ShouldDelete;
+};
+
+struct KernelArgsMappingInfoTy {
+  AccKernelArgsTy &KernelArgs;
+
+  // Memory needed for launch
+  void addLaunchAlloc(void *Alloc) { LaunchAllocs.push_back(Alloc); }
+  SmallVector<void *> LaunchAllocs;
+
+  // Arguments
+  void addArg(void *Arg) { Args.push_back(Arg); }
+  SmallVector<void *> Args;
+  SmallVector<void *> Ptrs;
+
+  KernelLaunchParamsTy getLaunchArgs() {
+    assert(Ptrs.size() == 0);
+
+    if (Args.size() == 0)
+      return KernelLaunchParamsTy{};
+
+    unsigned NumArgs = Args.size();
+    Ptrs.resize(NumArgs);
+    for (uint32_t I = 0; I < NumArgs; ++I)
+      Ptrs[I] = &Args[I];
+    return KernelLaunchParamsTy{sizeof(void *) * NumArgs, &Args[0], &Ptrs[0]};
+  }
+};
+
+[[nodiscard]] int accPostProcessingTargetDataEnd(DeviceTy *Device,
+                                                 PostProcessingInfo *Info) {
+  // This will make sure we delete it when we exit the function.
+  std::unique_ptr<PostProcessingInfo> InfoDeleter(Info);
+
+  int Ret = OFFLOAD_SUCCESS;
+
+  assert(!Info->TPR.isHostPointer());
+
+  MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+      Device->getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
+
+  // We cannot use a lock guard because we may end up delete the mutex.
+  // We also explicitly unlocked the entry after it was put in the EntriesInfo
+  // so it can be reused.
+  Info->TPR.getEntry()->lock();
+  auto *Entry = Info->TPR.getEntry();
+
+  // TODO I do not understand why this is necessary - does the mapping
+  // automatically queue up entry deletion?
+  bool DelEntry = Info->ShouldDelete;
+  const bool IsNotLastUser = Entry->decDataEndThreadCount() != 0;
+  if (DelEntry && (Entry->getTotalRefCount() != 0 || IsNotLastUser)) {
+    ODBG(ADT_Mapping) << "IsNotLastUser";
+    // The thread is not in charge of deletion anymore. Give up access
+    // to the HDTT map and unset the deletion flag.
+    HDTTMap.destroy();
+    DelEntry = false;
+  }
+
+  if (Info->ShouldRestoreShadow) {
+    Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) {
+      ODBG(ADT_Mapping) << "Restoring host shadow "
+                        << (void *)ShadowPtr.HstPtrAddr
+                        << " to its original content (" << ShadowPtr.PtrSize
+                        << " bytes)";
+      std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
+                  ShadowPtr.PtrSize);
+      return OFFLOAD_SUCCESS;
+    });
+  }
+
+  // Give up the lock as we either don't need it anymore (e.g., done with
+  // TPR), or erase TPR.
+  Info->TPR.setEntry(nullptr);
+
+  if (!Info->ShouldDelete)
+    return Ret;
+
+  Ret = Device->getMappingInfo().eraseMapEntry(HDTTMap, Entry, Info->DataSize);
+  // Entry is already remove from the map, we can unlock it now.
+  HDTTMap.destroy();
+  Ret |= Device->getMappingInfo().deallocTgtPtrAndEntry(Entry, Info->DataSize);
+  if (Ret != OFFLOAD_SUCCESS)
+    REPORT_FATAL() << "Deallocating data from device failed.";
+
+  return OFFLOAD_SUCCESS;
+}
+
+template <typename SizeTy>
+void handleSingleDataEnd(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+                         SizeTy ArgSize, bool ForceDelete, bool IsNoCreate,
+                         AccCopyOutType CopyType, AccRefCountingType MapType,
+                         AsyncInfoTy &AsyncInfo, DeviceTy &Device) {
+  int64_t DataSize;
+  if constexpr (std::is_same<SizeTy, int64_t>::value) {
+    DataSize = ArgSize;
+  } else if constexpr (std::is_same<SizeTy, NonContigDescTy &>::value) {
+    DataSize = ArgSize.getAllocSize();
+  } else {
+    static_assert(false);
+  }
+
+  FUNC_LOGGER();
+  TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+      ArgPtr, DataSize, /*UpdateRefCount=*/true,
+      /*HasHoldModifier=*/MapType == AccRefCountingType::Structured, IsNoCreate,
+      ForceDelete,
+      /*FromDataEnd=*/true);
+  if (!TPR.isPresent()) {
+    ODBG(ADT_Mapping) << "Mapping does not exist: "
+                      << (IsNoCreate ? "is no_create" : "error");
+    if (Pedantic && !IsNoCreate)
+      REPORT_FATAL() << "Device mapping does not exist at " << Loc;
+    return;
+  }
+
+  void *HstPtrBegin = ArgPtr;
+  void *TgtPtrBegin = TPR.TargetPointer;
+  ODBG(ADT_Mapping) << "There are " << DataSize
+                    << " bytes allocated at target address " << TgtPtrBegin
+                    << " - is" << (TPR.Flags.IsLast ? "" : " not") << " last";
+
+  bool ShouldDelete = ForceDelete || TPR.Flags.IsLast;
+  bool ShouldCopyOut = CopyType == AccCopyOutType::Always ||
+                       (CopyType == AccCopyOutType::OnDelete && ShouldDelete);
+  if (ShouldCopyOut) {
+    ODBG(ADT_Mapping) << "Moving " << DataSize << " bytes (tgt:" << TgtPtrBegin
+                      << ") -> (hst:" << HstPtrBegin << ")";
+    int Ret;
+    if constexpr (std::is_same<SizeTy, int64_t>::value) {
+      Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo,
+                                TPR.getEntry());
+    } else if constexpr (std::is_same<SizeTy, NonContigDescTy &>::value) {
+      Ret = Device.retrieveNonContigData(HstPtrBegin, TgtPtrBegin, ArgSize,
+                                         AsyncInfo, TPR.getEntry());
+    }
+    if (Ret != OFFLOAD_SUCCESS)
+      REPORT_FATAL() << "Failed to transfer data from device at " << Loc;
+  }
+
+  ODBG(ADT_Mapping) << "Queueing up post processing";
+
+  // TODO We may want to have a more intricate system for queueing up post
+  // processing. In OpenACC, we could potentially queue up a lot of stream
+  // operations before syncing, and we only execute these post processing
+  // functions after we sync. This would leave a lot of deallocation and
+  // unmapping queued for post processing but never happening because the
+  // operations in the stream are continuing to execute while we add more post
+  // processing funcitons which we never execute.
+  //
+  // One option here is to use Device.enqueueHostCall, however, for example, for
+  // CUDA, having cuFree's etc in a function executed in the stream is not
+  // supported (the context in that thread is invalid for calling cuda
+  // functions). Instead, we can have the host call "notify" that we can execute
+  // specific post processing functions, and we execute them at some point
+  // during execution on the normal threads.
+  auto *PostProcessingPtr = new PostProcessingInfo{std::move(TPR), DataSize,
+                                                   ShouldCopyOut, ShouldDelete};
+  PostProcessingPtr->TPR.getEntry()->unlock();
+  AsyncInfo.addPostProcessingFunction([=, Device = &Device]() -> int {
+    return accPostProcessingTargetDataEnd(Device, PostProcessingPtr);
+  });
+}
+
+struct DescAndMemMappingInfoTy {
+  DescMappingInfoTy Desc;
+  std::optional<MemMappingInfoTy> Memory;
+};
+
+struct ArgDescriptorsTy {
+
+  const Fortran::runtime::Descriptor *Flang = nullptr;
+  std::optional<MaterializedMemRefDesc> MemRef = std::nullopt;
+  const AccDataDescOpenACC *Acc = nullptr;
+
+  bool isNone() { return !Flang && !MemRef && !Acc; }
+
+  void verify() {
+    assert(!(Flang && MemRef));
+    assert(!isNone());
+  }
+
+  void dump(llvm::raw_ostream &OS) {
+    OS << "ArgDescriptorsTy:\n";
+    OS << "Flang:\n";
+    if (Flang) {
+      // TODO can we use OS somehow?
+      Flang->Dump(stderr);
+    } else {
+      OS << "(nil)\n";
+    }
+    OS << "MemRef:\n";
+    if (MemRef) {
+      ::dump(*MemRef, OS);
+    } else {
+      OS << "(nil)\n";
+    }
+    OS << "Acc:\n";
+    if (Acc) {
+      ::dump(*Acc, OS);
+    } else {
+      OS << "(nil)\n";
+    }
+  }
+
+  void collectAccBounds(ident_t *Loc, ArrayInfo &AI) {
+    assert(Acc);
+    if (AI.ElementSize <= 0) {
+      REPORT_FATAL() << Loc << "Invalid element size";
+    }
+
+    AI.Dims.reserve(Acc->Rank);
+    for (std::size_t I = 0; I < Acc->Rank; I++) {
+      AI.Dims.push_back({});
+      auto &ThisDim = AI.Dims.back();
+      long SizeFactor = 1;
+      if (Acc->StridesInBytes[I] % AI.ElementSize != 0) {
+        if (I == 0) {
+          // `stride` in AccArrayDim is meant to be multiplied by elementsize.
+          // But the stride of a sliced descriptor array might not be divisible
+          // by the current element size. So, reduce elementsize.
+          SizeFactor =
+              AI.ElementSize / std::gcd(AI.ElementSize, Acc->StridesInBytes[I]);
+          AI.ElementSize /= SizeFactor;
+        } else {
+          REPORT_FATAL() << Loc << "Invalid array stride";
+        }
+      }
+      ThisDim.Offset = Acc->LowerBounds[I];
+      ThisDim.Stride = Acc->StridesInBytes[I] / AI.ElementSize;
+      ThisDim.Size =
+          SizeFactor * (Acc->UpperBounds[I] - Acc->LowerBounds[I] + 1);
+      ThisDim.Extent = Acc->Extents[I];
+    }
+  }
+
+  void collectFlangBounds(ident_t *Loc, ArrayInfo &AI) {
+    assert(Flang);
+    if (AI.ElementSize <= 0) {
+      REPORT_FATAL() << Loc << "Invalid element size";
+    }
+
+    AI.Dims.reserve(Flang->rank());
+    AI.ElementSize = Flang->ElementBytes();
+    for (int I = 0; I < Flang->rank(); I++) {
+      AI.Dims.push_back({});
+      auto &ThisDim = AI.Dims.back();
+      auto &FlangDim = Flang->GetDimension(I);
+      long SizeFactor = 1;
+      if (FlangDim.ByteStride() % AI.ElementSize != 0) {
+        if (I == 0) {
+          SizeFactor =
+              AI.ElementSize / std::gcd(AI.ElementSize, FlangDim.ByteStride());
+          AI.ElementSize /= SizeFactor;
+        } else {
+          REPORT_FATAL() << Loc << "Invalid array stride";
+        }
+      }
+      ThisDim.Offset = 0;
+      ThisDim.Stride = FlangDim.ByteStride() / AI.ElementSize;
+      ThisDim.Size = SizeFactor * FlangDim.Extent();
+    }
+  }
+
+  using LiteralArg = void *;
+  using ArgMappingInfoTy =
+      std::variant<DescAndMemMappingInfoTy, MemMappingInfoTy, LiteralArg>;
+  using ArgMappingInfosTy = std::vector<ArgMappingInfoTy>;
+
+  ArgMappingInfosTy getMappingInfos(ident_t *Loc, void *Ptr) {
+    if (Flang) {
+      auto DMI = DescMappingInfoTy{Flang->SizeInBytes(),
+                                   offsetof(CFI_cdesc_t, base_addr)};
+      if (!Flang->IsAllocated()) {
+        ODBG() << "Is not allocated - nothing to map.";
+        ArgMappingInfosTy MIs;
+        MIs.emplace_back(DescAndMemMappingInfoTy{DMI, std::nullopt});
+        return MIs;
+      }
+
+      ArrayInfo AI;
+      AI.ElementSize = Flang->ElementBytes();
+      if (AI.ElementSize == 0 && Acc) {
+        AI.ElementSize = Acc->ElementSize;
+      }
+      AI.setPtr(Flang->OffsetElement(0));
+
+      if (Acc) {
+        collectAccBounds(Loc, AI);
+      } else {
+        collectFlangBounds(Loc, AI);
+      }
+      AI.normalize();
+      AI.computeSizeFromDims(Loc);
+
+      auto MMI = MemMappingInfoTy{};
+      MMI.RawMemoryPtr = AI.RawMemoryAddr;
+      MMI.RawMemoryBasePtr = Flang->OffsetElement(0);
+      MMI.RawMemorySize = AI.RawMemorySize;
+      MMI.CopyDesc = AI.generateNonContigCopyDesc(Loc);
+
+      ArgMappingInfosTy MIs;
+      MIs.emplace_back(DescAndMemMappingInfoTy{DMI, std::move(MMI)});
+      return MIs;
+    } else if (MemRef) {
+      if (Acc) {
+        REPORT_FATAL() << Loc << "Unsupported: MemRef with OpenACC bounds";
+      }
+
+      ArgMappingInfosTy MIs;
+
+      uint64_t Extent = 1LL;
+      for (ssize_t I = (ssize_t)MemRef->rank - 1; I >= 0; I--) {
+        Extent *= MemRef->sizes[I];
+        if (Extent != MemRef->strides[I] * MemRef->sizes[I]) {
+          REPORT_FATAL() << Loc << "Invalid memref descriptor";
+        }
+      }
+      Extent *= MemRef->elementSize;
+
+      {
+        auto MMI = MemMappingInfoTy{};
+        MMI.RawMemoryPtr = MemRef->allocatedPtr;
+        MMI.RawMemoryBasePtr = MemRef->allocatedPtr;
+        MMI.RawMemorySize = Extent;
+        MIs.push_back(std::move(MMI));
+      }
+      {
+        auto MMI = MemMappingInfoTy{};
+        MMI.RawMemoryPtr = MemRef->alignedPtr;
+        MMI.RawMemoryBasePtr = MemRef->allocatedPtr;
+        MMI.RawMemorySize = Extent;
+        MIs.push_back(std::move(MMI));
+      }
+
+      MIs.push_back(reinterpret_cast<void *>(MemRef->offset));
+
+      for (size_t I = 0; I < MemRef->rank; I++) {
+        MIs.push_back(reinterpret_cast<void *>(MemRef->sizes[I]));
+        MIs.push_back(reinterpret_cast<void *>(MemRef->strides[I]));
+      }
+
+      return MIs;
+    } else if (Acc) {
+      ArrayInfo AI;
+      AI.ElementSize = Acc->ElementSize;
+      AI.setPtr(Ptr);
+      collectAccBounds(Loc, AI);
+      AI.normalize();
+      AI.computeSizeFromDims(Loc);
+
+      auto MMI = MemMappingInfoTy{};
+      MMI.RawMemoryPtr = AI.RawMemoryAddr;
+      MMI.RawMemoryBasePtr = Ptr;
+      MMI.RawMemorySize = AI.RawMemorySize;
+      MMI.CopyDesc = AI.generateNonContigCopyDesc(Loc);
+
+      ArgMappingInfosTy MIs;
+      MIs.push_back(std::move(MMI));
+      return MIs;
+    } else {
+      REPORT_FATAL() << Loc << "Unknown case.";
+      abort();
+    }
+  }
+
+  void dataBeginPrivate(ident_t *Loc, void *ArgPtr, int64_t ArgSize,
+                        bool HasFlagTo, DeviceTy &Device,
+                        AsyncInfoTy &AsyncInfo,
+                        MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+                        KernelArgsMappingInfoTy &KI) {
+    assert(!MemRef);
+
+    if (Flang) {
+      REPORT_FATAL() << "TODO Flang descriptor on private variable";
+    } else if (Acc) {
+      assert(ArgPtr);
+      ArrayInfo AI;
+      AI.ElementSize = Acc->ElementSize;
+      AI.setPtr(ArgPtr);
+      collectAccBounds(Loc, AI);
+      AI.normalize();
+      AI.computeSizeFromDims(Loc);
+
+      assert(AI.RawMemorySize);
+      size_t DataSize = *AI.RawMemorySize;
+      void *HostData = AI.RawMemoryAddr;
+      ptrdiff_t Offset = reinterpret_cast<intptr_t>(HostData) -
+                         reinterpret_cast<intptr_t>(ArgPtr);
+
+      ODBG(ADT_Interface) << "ACC firstprivate (partial): dataSize=" << DataSize
+                          << " hostData=" << HostData << " (base=" << ArgPtr
+                          << ")";
+
+      void *PrivateMemory =
+          Device.allocData(DataSize, nullptr, TARGET_ALLOC_DEVICE);
+      Device.submitData(PrivateMemory, HostData, DataSize, AsyncInfo,
+                        /*Entry=*/nullptr, &HDTTMap);
+      KI.addArg(static_cast<char *>(PrivateMemory) - Offset);
+      KI.addLaunchAlloc(PrivateMemory);
+    } else {
+      REPORT_FATAL() << Loc << "Unknown descriptor type for private variable";
+    }
+  }
+
+  void dataBeginDevPtr(ident_t *Loc, DeviceTy &Device, AsyncInfoTy &AsyncInfo,
+                       MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+                       KernelArgsMappingInfoTy &KI) {
+    assert(Flang && !MemRef);
+    size_t DescSize = Flang->SizeInBytes();
+    void *DevDesc = Device.allocData(DescSize, nullptr, TARGET_ALLOC_DEVICE);
+    Device.submitData(DevDesc,
+                      const_cast<void *>(static_cast<const void *>(Flang)),
+                      DescSize, AsyncInfo, /*Entry=*/nullptr, &HDTTMap);
+    KI.addArg(DevDesc);
+    KI.addLaunchAlloc(DevDesc);
+  }
+
+  void dataBegin(ident_t *Loc, void *ArgPtr, void *DescriptorAddr,
+                 void *&ParentAllocation, bool IsPtrAndObj, char *ArgName,
+                 bool HasFlagTo, bool IsNoCreate, AccRefCountingType MapType,
+                 AsyncInfoTy &AsyncInfo, DeviceTy &Device,
+                 MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+                 KernelArgsMappingInfoTy *KI) {
+    auto AddArg = [&](TargetPointerResultTy &TPR, void *TgtArg, void *HstArg) {
+      if (KI) {
+        if (TPR.isPresent()) {
+          KI->addArg(TgtArg);
+        } else {
+          assert(IsNoCreate);
+          KI->addArg(HstArg);
+        }
+      }
+    };
+    auto MapWithDesc = [&](MemMappingInfoTy &MemInfo, void *BasePtr,
+                           bool IsParam) -> void * {
+      assert(MemInfo.CopyDesc);
+      ODBG() << "Will use non-contig copy.";
+
+      TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+          HDTTMap, MemInfo.RawMemoryPtr, MemInfo.RawMemoryBasePtr, 0,
+          &*MemInfo.CopyDesc, ArgName, HasFlagTo,
+          /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+          /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+          /*HasPresentModifier=*/false,
+          /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+          IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+          /*ReleaseHDTTMap=*/false);
+      if (IsParam)
+        AddArg(TPR,
+               reinterpret_cast<void *>(
+                   (reinterpret_cast<intptr_t>(TPR.TargetPointer) -
+                    MemInfo.getBaseDelta())),
+               MemInfo.RawMemoryPtr);
+      return TPR.TargetPointer;
+    };
+
+    auto MapInfos = getMappingInfos(Loc, ArgPtr);
+    auto DescAndMemCase = [&](DescAndMemMappingInfoTy &MapInfo) {
+      ODBG() << "Mapping desc and mem";
+      auto &DescInfo = MapInfo.Desc;
+
+      void *DescTgtPtr = nullptr;
+      if (!ParentAllocation)
+        ParentAllocation = DescriptorAddr;
+
+      {
+        // Always copy the descriptor to device. It is needed regardless of the
+        // user-specified TO/FROM, and regardless of whether no_create is on or
+        // not as the no_create can refer to the raw memory in the descriptor.
+        TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+            HDTTMap, DescriptorAddr, DescriptorAddr, 0, (int64_t)DescInfo.DescriptorSize,
+            ArgName, /*HasFlagTo=*/true,
+            /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+            /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+            /*HasPresentModifier=*/false,
+            /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+            /*IsNoCreate=*/false, AsyncInfo, /*OwnedTPR=*/nullptr,
+            /*ReleaseHDTTMap=*/false);
+        DescTgtPtr = TPR.TargetPointer;
+        AddArg(TPR, DescTgtPtr, DescriptorAddr);
+      }
+
+      void *MemTgtPtr = nullptr;
+      if (MapInfo.Memory) {
+        auto &MemInfo = *MapInfo.Memory;
+        void *BasePtr =
+            static_cast<char *>(DescriptorAddr) + DescInfo.RawMemoryPtrOffset;
+        if (MemInfo.RawMemorySize) {
+          if (MemInfo.CopyDesc) {
+            MemTgtPtr = MapWithDesc(MemInfo, BasePtr, false);
+          } else {
+            TargetPointerResultTy TPR =
+                Device.getMappingInfo().getTargetPointer(
+                    HDTTMap, MemInfo.RawMemoryPtr, BasePtr, 0,
+                    (int64_t)*MemInfo.RawMemorySize, ArgName, HasFlagTo,
+                    /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+                    /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+                    /*HasPresentModifier=*/false,
+                    /*HasHoldModifier=*/MapType ==
+                        AccRefCountingType::Structured,
+                    IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+                    /*ReleaseHDTTMap=*/false);
+            MemTgtPtr = TPR.TargetPointer;
+          }
+        } else {
+          TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+              HDTTMap, MemInfo.RawMemoryPtr, BasePtr, 0, (int64_t)*MemInfo.RawMemorySize,
+              ArgName, HasFlagTo,
+              /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+              /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+              /*HasPresentModifier=*/true,
+              /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+              IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+              /*ReleaseHDTTMap=*/false);
+          MemTgtPtr = TPR.TargetPointer;
+        }
+
+        if (MemTgtPtr && DescTgtPtr) {
+          LookupResult DescLR = Device.getMappingInfo().lookupMapping(
+              HDTTMap, DescriptorAddr, DescInfo.DescriptorSize);
+          auto *DescEntry = DescLR.TPR.getEntry();
+          if (DescEntry) {
+            uintptr_t TgtDescBase =
+                DescEntry->TgtPtrBegin +
+                (reinterpret_cast<uintptr_t>(DescriptorAddr) -
+                 DescEntry->HstPtrBegin);
+            void **TgtPtrAddr = reinterpret_cast<void **>(
+                TgtDescBase + DescInfo.RawMemoryPtrOffset);
+            void **HstPtrAddr = reinterpret_cast<void **>(
+                reinterpret_cast<uintptr_t>(DescriptorAddr) +
+                DescInfo.RawMemoryPtrOffset);
+
+            void *HstBaseAddr = *HstPtrAddr;
+            void *TgtPteeBase = reinterpret_cast<void *>(
+                reinterpret_cast<uintptr_t>(MemTgtPtr) +
+                (reinterpret_cast<uintptr_t>(HstBaseAddr) -
+                 reinterpret_cast<uintptr_t>(MemInfo.RawMemoryPtr)));
+
+            if (DescEntry->addShadowPointer(
+                    ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase,
+                                    static_cast<int64_t>(sizeof(void *))})) {
+              ODBG() << "DescAndMemCase attach: device field " << TgtPtrAddr
+                     << " -> " << TgtPteeBase;
+              void *&Buf = AsyncInfo.getVoidPtrLocation();
+              Buf = TgtPteeBase;
+              Device.submitData(TgtPtrAddr, &Buf, sizeof(void *), AsyncInfo,
+                                DescEntry, &HDTTMap);
+              DescEntry->addEventIfNecessary(Device, AsyncInfo);
+            }
+          }
+        }
+      }
+    };
+    auto MemCase = [&](MemMappingInfoTy &MemInfo) {
+      if (MemInfo.RawMemorySize) {
+        if (MemInfo.CopyDesc) {
+          MapWithDesc(MemInfo, MemInfo.RawMemoryBasePtr, true);
+        } else {
+          TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+              HDTTMap, MemInfo.RawMemoryPtr, MemInfo.RawMemoryBasePtr, 0,
+              (int64_t)*MemInfo.RawMemorySize, ArgName, HasFlagTo,
+              /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+              /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+              /*HasPresentModifier=*/false,
+              /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+              IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+              /*ReleaseHDTTMap=*/false);
+          AddArg(TPR, TPR.TargetPointer, MemInfo.RawMemoryPtr);
+        }
+      } else {
+        TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+            HDTTMap, MemInfo.RawMemoryPtr, MemInfo.RawMemoryBasePtr, 0,
+            (int64_t)*MemInfo.RawMemorySize, ArgName, HasFlagTo,
+            /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+            /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+            /*HasPresentModifier=*/true,
+            /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+            IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+            /*ReleaseHDTTMap=*/false);
+        AddArg(TPR, TPR.TargetPointer, MemInfo.RawMemoryPtr);
+      }
+    };
+    auto LiteralCase = [&](void *Literal) {
+      if (KI)
+        KI->addArg(Literal);
+    };
+    for (auto &MapInfo : MapInfos) {
+      std::visit(overloads{DescAndMemCase, MemCase, LiteralCase}, MapInfo);
+    }
+  }
+
+  void dataEnd(ident_t *Loc, void *ArgPtr, void *DescriptorAddr,
+               void *ParentAllocation, int64_t ArgType, bool ForceDelete,
+               bool IsNoCreate, AccCopyOutType CopyType,
+               AccRefCountingType MapType, AsyncInfoTy &AsyncInfo,
+               DeviceTy &Device) {
+    auto MapWithDesc = [&](MemMappingInfoTy &MemInfo, void *BasePtr) {
+      if (MemInfo.CopyDesc) {
+        ODBG(ADT_Mapping) << "Will use non-contig copy.";
+
+        handleSingleDataEnd<NonContigDescTy &>(
+            Loc, BasePtr, MemInfo.RawMemoryPtr, *MemInfo.CopyDesc, ForceDelete,
+            IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+      }
+    };
+
+    auto MapInfos = getMappingInfos(Loc, ArgPtr);
+    auto DescAndMemCase = [&](DescAndMemMappingInfoTy &MapInfo) {
+      auto &DescInfo = MapInfo.Desc;
+
+      if (!ParentAllocation) {
+        ParentAllocation = DescriptorAddr;
+      }
+
+      handleSingleDataEnd<int64_t>(
+          Loc, DescriptorAddr, DescriptorAddr, DescInfo.DescriptorSize,
+          ForceDelete, IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+
+      if (MapInfo.Memory) {
+        auto &MemInfo = *MapInfo.Memory;
+        void *BasePtr =
+            static_cast<char *>(DescriptorAddr) + DescInfo.RawMemoryPtrOffset;
+        if (MemInfo.RawMemorySize) {
+          if (MemInfo.CopyDesc) {
+            MapWithDesc(MemInfo, BasePtr);
+          } else {
+            handleSingleDataEnd<int64_t>(
+                Loc, BasePtr, MemInfo.RawMemoryPtr, *MemInfo.RawMemorySize,
+                ForceDelete, IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+          }
+        } else {
+          handleSingleDataEnd<int64_t>(Loc, BasePtr, MemInfo.RawMemoryPtr, 0,
+                                       ForceDelete, IsNoCreate, CopyType,
+                                       MapType, AsyncInfo, Device);
+        }
+      }
+    };
+    auto MemCase = [&](MemMappingInfoTy &MemInfo) {
+      if (MemInfo.RawMemorySize) {
+        if (MemInfo.CopyDesc) {
+          MapWithDesc(MemInfo, MemInfo.RawMemoryBasePtr);
+        } else {
+          handleSingleDataEnd<int64_t>(
+              Loc, MemInfo.RawMemoryBasePtr, MemInfo.RawMemoryPtr,
+              *MemInfo.RawMemorySize, ForceDelete, IsNoCreate, CopyType,
+              MapType, AsyncInfo, Device);
+        }
+      } else {
+        handleSingleDataEnd<int64_t>(
+            Loc, MemInfo.RawMemoryBasePtr, MemInfo.RawMemoryPtr, 0, ForceDelete,
+            IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+      }
+    };
+    auto LiteralCase = [&](void *Literal) {};
+    for (auto &MapInfo : MapInfos) {
+      std::visit(overloads{DescAndMemCase, MemCase, LiteralCase}, MapInfo);
+    }
+  }
+
+  void dataUpdate(ident_t *Loc, void *ArgPtr, int64_t ArgType,
+                  AsyncInfoTy &AsyncInfo, DeviceTy &Device) {
+    const bool HasFlagTo = ArgType & TGT_ACC_MAPTYPE_TO;
+    const bool HasFlagFrom = ArgType & TGT_ACC_MAPTYPE_FROM;
+
+    auto LookupMapping = [&](void *HstPtr,
+                             int64_t Size) -> TargetPointerResultTy {
+      TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+          HstPtr, Size, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false,
+          /*MustContain=*/true);
+      if (!TPR.isPresent()) {
+        if (ArgType & TGT_ACC_MAPTYPE_IF_PRESENT) {
+          ODBG(ADT_Interface) << "Not present, if_present - skipping update.";
+          return TPR;
+        }
+        REPORT_FATAL() << "Device mapping does not exist for update at " << Loc;
+      }
+      return TPR;
+    };
+
+    auto DoContiguousUpdate = [&](void *HstPtr, int64_t Size) {
+      TargetPointerResultTy TPR = LookupMapping(HstPtr, Size);
+      if (!TPR.isPresent())
+        return;
+      void *TgtPtr = TPR.TargetPointer;
+      if (HasFlagTo) {
+        ODBG(ADT_Interface) << "Update TO: " << Size << " bytes hst:" << HstPtr
+                            << " -> tgt:" << TgtPtr;
+        Device.submitData(TgtPtr, HstPtr, Size, AsyncInfo, TPR.getEntry());
+      }
+      if (HasFlagFrom) {
+        ODBG(ADT_Interface) << "Update FROM: " << Size
+                            << " bytes tgt:" << TgtPtr << " -> hst:" << HstPtr;
+        Device.retrieveData(HstPtr, TgtPtr, Size, AsyncInfo, TPR.getEntry());
+      }
+    };
+
+    auto DoNonContigUpdate = [&](MemMappingInfoTy &MemInfo) {
+      ODBG(ADT_Interface) << "Will use non-contig update.";
+
+      int64_t AllocSize = MemInfo.CopyDesc->getAllocSize();
+
+      TargetPointerResultTy TPR =
+          LookupMapping(MemInfo.RawMemoryPtr, AllocSize);
+      if (!TPR.isPresent())
+        return;
+      void *TgtPtr = TPR.TargetPointer;
+      if (HasFlagTo) {
+        ODBG(ADT_Interface)
+            << "Non-contig update TO: hst:" << MemInfo.RawMemoryPtr
+            << " -> tgt:" << TgtPtr;
+        Device.submitNonContigData(TgtPtr, MemInfo.RawMemoryPtr,
+                                   *MemInfo.CopyDesc, AsyncInfo,
+                                   TPR.getEntry());
+      }
+      if (HasFlagFrom) {
+        ODBG(ADT_Interface) << "Non-contig update FROM: tgt:" << TgtPtr
+                            << " -> hst:" << MemInfo.RawMemoryPtr;
+        Device.retrieveNonContigData(MemInfo.RawMemoryPtr, TgtPtr,
+                                     *MemInfo.CopyDesc, AsyncInfo,
+                                     TPR.getEntry());
+      }
+    };
+
+    auto MapInfos = getMappingInfos(Loc, ArgPtr);
+    for (auto &MapInfo : MapInfos) {
+      if (auto *DM = std::get_if<DescAndMemMappingInfoTy>(&MapInfo)) {
+        if (DM->Memory && DM->Memory->RawMemorySize) {
+          if (DM->Memory->CopyDesc) {
+            DoNonContigUpdate(*DM->Memory);
+          } else {
+            DoContiguousUpdate(DM->Memory->RawMemoryPtr,
+                               *DM->Memory->RawMemorySize);
+          }
+        }
+      } else if (auto *MM = std::get_if<MemMappingInfoTy>(&MapInfo)) {
+        if (MM->RawMemorySize) {
+          if (MM->CopyDesc) {
+            DoNonContigUpdate(*MM);
+          } else {
+            DoContiguousUpdate(MM->RawMemoryPtr, *MM->RawMemorySize);
+          }
+        }
+      }
+    }
+  }
+};
+
+const uint64_t *getMemRefSizes(const MemRefDesc *Desc) {
+  return &Desc->sizes[0];
+}
+const uint64_t *getMemRefStrides(const MemRefDesc *Desc, unsigned Rank) {
+  return &Desc->sizes[0] + Rank;
+}
+
+ArgDescriptorsTy parseArgDescs(ident_t *Loc, const AccDataDesc *ArgDesc) {
+  ArgDescriptorsTy Descs;
+  if (!ArgDesc)
+    return Descs;
+
+  if (ArgDesc->Version & TGT_ACC_DESC_F18) {
+    Descs.Flang = reinterpret_cast<decltype(Descs.Flang)>(
+        ((const AccDataDescF18 *)ArgDesc)->FortranDescriptor);
+  }
+  if (ArgDesc->Version & TGT_ACC_DESC_MEMREF) {
+    const MemRefDesc *DescMemRef =
+        ((const AccDataDescMemRef *)ArgDesc)->MemRefDescriptor;
+    Descs.MemRef = MaterializedMemRefDesc{};
+    Descs.MemRef->allocatedPtr = DescMemRef->allocatedPtr;
+    Descs.MemRef->alignedPtr = DescMemRef->alignedPtr;
+    Descs.MemRef->offset = DescMemRef->offset;
+    Descs.MemRef->rank = ((const AccDataDescMemRef *)ArgDesc)->Rank;
+    Descs.MemRef->sizes = getMemRefSizes(DescMemRef);
+    Descs.MemRef->strides = getMemRefStrides(DescMemRef, Descs.MemRef->rank);
+    Descs.MemRef->elementSize =
+        ((const AccDataDescMemRef *)ArgDesc)->ElementSize;
+  }
+  if (ArgDesc->Version & TGT_ACC_DESC_OPENACC) {
+    int64_t DescPadding = 0;
+    if (Descs.Flang) {
+      DescPadding = sizeof(CFI_cdesc_t *);
+    } else if (Descs.MemRef) {
+      REPORT_FATAL() << Loc << "Unsupported: MemRef with OpenACC bounds";
+    } else {
+      DescPadding = 0;
+    }
+    Descs.Acc = reinterpret_cast<const AccDataDescOpenACC *>(
+        reinterpret_cast<const char *>(ArgDesc) + DescPadding);
+  }
+  return Descs;
+}
+
+ArgDescriptorsTy parseAndVerifyArgDescs(ident_t *Loc,
+                                        const AccDataDesc *ArgDesc) {
+  ArgDescriptorsTy Descs = parseArgDescs(Loc, ArgDesc);
+  ODBG_IF([&]() { Descs.dump(llvm::dbgs()); });
+  Descs.verify();
+  return Descs;
+}
+
+void accTargetDataBegin(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+                        int64_t ArgSize, int64_t ArgType, char *ArgName,
+                        AccDataDesc *ArgDesc, AccRefCountingType MapType,
+                        AsyncInfoTy &AsyncInfo, DeviceTy &Device,
+                        MappingInfoTy::HDTTMapAccessorTy &HDTTMap,
+                        KernelArgsMappingInfoTy *KI = nullptr) {
+  // clang-format off
+  ODBG(ADT_Interface)
+      << "targetDataBegin "
+      << "ArgName=" << getNameFromMapping(ArgName) << ", "
+      << "ArgBasePtr=" << ArgBasePtr << ", "
+      << "ArgPtr=" << ArgPtr << ", "
+      << "ArgSize=" << ArgSize << ", "
+      << "ArgType=" << mapTypeToString(ArgType)
+      << " (" << llvm::format_hex(ArgType, 0) << "), "
+      << "ArgDesc=" << ArgDesc;
+  // clang-format on
+
+  // OpenACC 3.4: `if_present` is only valid on `host_data` and `update`
+  // directives.
+  assert(!(ArgType & TGT_ACC_MAPTYPE_IF_PRESENT));
+  assert(!!ArgBasePtr == !!(ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ));
+
+  bool IsNoCreate = ArgType & TGT_ACC_MAPTYPE_NO_CREATE;
+  auto AddArg = [&](TargetPointerResultTy &TPR, void *TgtArg, void *HstArg) {
+    if (KI) {
+      if (TPR.isPresent()) {
+        KI->addArg(TgtArg);
+      } else {
+        assert(IsNoCreate);
+        KI->addArg(HstArg);
+      }
+    }
+  };
+
+  if (ArgType & TGT_ACC_MAPTYPE_DEVPTR) {
+    if (!KI) {
+      ODBG(ADT_Interface) << "DEVPTR arg in non-kernel context - ignoring.";
+      return;
+    }
+    if (!ArgDesc) {
+      void *LiteralValue = *reinterpret_cast<void ***>(ArgPtr);
+      ODBG(ADT_Interface) << "Got literal device pointer: " << LiteralValue;
+      KI->addArg(LiteralValue);
+      return;
+    }
+    // DEVPTR with a descriptor. The kernel is compiled to receive a device-side
+    // descriptor as a pointer.
+    ODBG(ADT_Interface) << "DEVPTR with descriptor";
+    ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+    Descs.dataBeginDevPtr(Loc, Device, AsyncInfo, HDTTMap, *KI);
+    return;
+  }
+
+  const bool HasFlagTo = ArgType & TGT_ACC_MAPTYPE_TO;
+  if (ArgType & TGT_ACC_MAPTYPE_PRIVATE) {
+    assert(KI && "Private arg should only appear on kernels");
+
+    int64_t BaseAllocSize = ArgSize;
+
+    if (BaseAllocSize <= 0)
+      REPORT_FATAL() << "Invalid private variable size";
+
+    int64_t NumPrivate = 1;
+    if ((ArgType & TGT_ACC_MAPTYPE_GANG_PRIVATE))
+      NumPrivate *= KI->KernelArgs.NumGangs[0] * KI->KernelArgs.NumGangs[1] *
+                    KI->KernelArgs.NumGangs[2];
+    if ((ArgType & TGT_ACC_MAPTYPE_WORKER_PRIVATE))
+      NumPrivate *= KI->KernelArgs.NumWorkers;
+    if ((ArgType & TGT_ACC_MAPTYPE_VECTOR_PRIVATE))
+      NumPrivate *= KI->KernelArgs.VectorLength;
+
+    if (ArgDesc) {
+      if (NumPrivate != 1)
+        REPORT_FATAL() << Loc << " Multi-dim private array variable is invalid";
+      ODBG() << "Arg desc on private variable";
+      ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+      Descs.dataBeginPrivate(Loc, ArgPtr, ArgSize, HasFlagTo, Device, AsyncInfo,
+                             HDTTMap, *KI);
+    } else {
+      void *PrivateMemory = Device.allocData(BaseAllocSize * NumPrivate,
+                                             nullptr, TARGET_ALLOC_DEFAULT);
+      ODBG(ADT_Interface) << "Allocated private memory with size "
+                          << BaseAllocSize << " (" << NumPrivate
+                          << " instances) at " << PrivateMemory;
+
+      if (HasFlagTo) {
+        if (NumPrivate != 1)
+          REPORT_FATAL() << Loc
+                         << " Multi-dim private variable with copy is invalid";
+
+        assert(ArgPtr);
+        Device.submitData(PrivateMemory, ArgPtr, ArgSize, AsyncInfo,
+                          /*Entry=*/nullptr, &HDTTMap);
+      }
+      KI->addArg(PrivateMemory);
+      KI->addLaunchAlloc(PrivateMemory);
+    }
+
+    return;
+  }
+
+  if (ArgType & TGT_ACC_MAPTYPE_LITERAL) {
+    assert(KI && "Literal arg should only appear on kernels");
+    assert(ArgSize && "We need size information to pass in literal args");
+    assert(!ArgDesc);
+    // Our codegen uses indirection for literal args.
+    if (ArgSize <= (int)sizeof(void *)) {
+      // If it is possible to type pun to pointer (i.e. the type width is no
+      // bigger than a pointer, then pass it in literally.
+      void *LiteralValue = *reinterpret_cast<void ***>(ArgPtr);
+      KI->addArg(LiteralValue);
+      return;
+    } else {
+      REPORT_FATAL() << "TODO need to move memory to device";
+      // KI->addArg(DeviceArgPtr);
+      return;
+    }
+  }
+
+  assert(ArgPtr && "We need to have a pointer for data mapping");
+
+  void *ParentAllocation = nullptr;
+  void *DescriptorAddr;
+  bool IsPtrAndObj = ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ;
+  if (IsPtrAndObj) {
+    ODBG() << "We got a parent object.";
+    assert(ArgBasePtr);
+    if (Device.getMappingInfo().getTgtPtrBegin(HDTTMap, ArgBasePtr, 1)) {
+      ParentAllocation = ArgBasePtr;
+      DescriptorAddr = ArgBasePtr;
+    } else {
+      // PTR_AND_OBJ but parent not present on device (e.g. enter data copyin
+      // of a pointer component without its parent struct). Data is already
+      // mapped standalone; skip descriptor attach.
+      ODBG() << "Parent not present on device - mapping standalone.";
+      IsPtrAndObj = false;
+      DescriptorAddr = ArgPtr;
+    }
+  } else {
+    DescriptorAddr = ArgPtr;
+  }
+
+  if (ArgSize > 0) {
+    ODBG() << "We got size from the compiler - no descriptor parsing needed.";
+
+    if (IsPtrAndObj) {
+      // Map the pointee data, then release entry lock before looking up
+      // the parent for pointer attachment.
+      void *MemTgtPtr = nullptr;
+      {
+        TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+            HDTTMap, ArgPtr, ArgPtr, 0, ArgSize, ArgName, HasFlagTo,
+            /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+            /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+            /*HasPresentModifier=*/false,
+            /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+            IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+            /*ReleaseHDTTMap=*/false);
+        AddArg(TPR, TPR.TargetPointer, ArgPtr);
+        if (TPR.isPresent())
+          MemTgtPtr = TPR.TargetPointer;
+        else
+          assert(IsNoCreate);
+      }
+
+      // Update the parent's pointer field on device.
+      if (MemTgtPtr) {
+        LookupResult ParentLR = Device.getMappingInfo().lookupMapping(
+            HDTTMap, DescriptorAddr, sizeof(void *));
+        if (ParentLR.TPR.getEntry()) {
+          void **HstPtrAddr = reinterpret_cast<void **>(DescriptorAddr);
+          uintptr_t TgtDescAddr = ParentLR.TPR.getEntry()->TgtPtrBegin +
+                                  (reinterpret_cast<uintptr_t>(DescriptorAddr) -
+                                   ParentLR.TPR.getEntry()->HstPtrBegin);
+          void **TgtPtrAddr = reinterpret_cast<void **>(TgtDescAddr);
+
+          void *HstPteeBase = *HstPtrAddr;
+          void *TgtPteeBase = reinterpret_cast<void *>(
+              reinterpret_cast<uintptr_t>(MemTgtPtr) -
+              (reinterpret_cast<uintptr_t>(ArgPtr) -
+               reinterpret_cast<uintptr_t>(HstPteeBase)));
+
+          if (ParentLR.TPR.getEntry()->addShadowPointer(
+                  ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase,
+                                  static_cast<int64_t>(sizeof(void *))})) {
+            ODBG() << "PTR_AND_OBJ attach: device field " << TgtPtrAddr
+                   << " -> " << TgtPteeBase;
+            void *&Buf = AsyncInfo.getVoidPtrLocation();
+            Buf = TgtPteeBase;
+            Device.submitData(TgtPtrAddr, &Buf, sizeof(void *), AsyncInfo,
+                              ParentLR.TPR.getEntry(), &HDTTMap);
+            ParentLR.TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo);
+          }
+        }
+      }
+
+    } else {
+      TargetPointerResultTy TPR = Device.getMappingInfo().getTargetPointer(
+          HDTTMap, ArgPtr, ArgPtr, 0, ArgSize, ArgName, HasFlagTo,
+          /*HasFlagAlways=*/false, /*IsImplicit=*/false,
+          /*UpdateRefCount=*/true, /*HasCloseModifier=*/false,
+          /*HasPresentModifier=*/false,
+          /*HasHoldModifier=*/MapType == AccRefCountingType::Structured,
+          IsNoCreate, AsyncInfo, /*OwnedTPR=*/nullptr,
+          /*ReleaseHDTTMap=*/false);
+      AddArg(TPR, TPR.TargetPointer, ArgPtr);
+    }
+    return;
+  }
+
+  ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+  Descs.dataBegin(Loc, ArgPtr, DescriptorAddr, ParentAllocation, IsPtrAndObj,
+                  ArgName, HasFlagTo, IsNoCreate, MapType, AsyncInfo, Device,
+                  HDTTMap, KI);
+}
+
+bool isPresent(DeviceTy &Device, void *Ptr) {
+  TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+      Ptr, 1, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false);
+  return TPR.isPresent();
+}
+
+void accTargetDataEnd(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+                      int64_t ArgSize, int64_t ArgType, char *ArgName,
+                      AccDataDesc *ArgDesc, AccRefCountingType MapType,
+                      AsyncInfoTy &AsyncInfo, DeviceTy &Device) {
+  // clang-format off
+  ODBG(ADT_Interface)
+      << "targetDataEnd "
+      << "ArgName=" << getNameFromMapping(ArgName) << ", "
+      << "ArgBasePtr=" << ArgBasePtr << ", "
+      << "ArgPtr=" << ArgPtr << ", "
+      << "ArgSize=" << ArgSize << ", "
+      << "ArgType=" << mapTypeToString(ArgType)
+      << " (" << llvm::format_hex(ArgType, 0) << "), "
+      << "ArgDesc=" << ArgDesc;
+  // clang-format on
+
+  // OpenACC 3.4: `if_present` is only valid on `host_data` and `update`
+  // directives.
+  assert(!(ArgType & TGT_ACC_MAPTYPE_IF_PRESENT));
+
+  assert(!!ArgBasePtr == !!(ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ));
+
+  // These types are only for kernel launches
+  if ((ArgType & TGT_ACC_MAPTYPE_VECTOR_PRIVATE) ||
+      (ArgType & TGT_ACC_MAPTYPE_GANG_PRIVATE) ||
+      (ArgType & TGT_ACC_MAPTYPE_WORKER_PRIVATE) ||
+      (ArgType & TGT_ACC_MAPTYPE_LITERAL) ||
+      (ArgType & TGT_ACC_MAPTYPE_DEVPTR) ||
+      (ArgType & TGT_ACC_MAPTYPE_PRIVATE)) {
+    ODBG(ADT_Interface) << "Kernel launch argument - ignoring.";
+    return;
+  }
+
+  assert(ArgPtr && "We need to have a pointer for data mapping");
+
+  void *ParentAllocation = nullptr;
+  void *DescriptorAddr;
+  bool IsPtrAndObj = ArgType & TGT_ACC_MAPTYPE_PTR_AND_OBJ;
+  if (IsPtrAndObj) {
+    ODBG(ADT_Mapping) << "We got a parent object.";
+    assert(ArgBasePtr);
+    if (isPresent(Device, ArgBasePtr)) {
+      ParentAllocation = ArgBasePtr;
+      DescriptorAddr = ArgBasePtr;
+    } else {
+      // PTR_AND_OBJ but parent not present on device (e.g. enter data copyin
+      // of a pointer component without its parent struct). Data is already
+      // mapped standalone; skip descriptor attach.
+      ODBG(ADT_Mapping) << "Parent not present on device - mapping standalone.";
+      IsPtrAndObj = false;
+      DescriptorAddr = ArgPtr;
+    }
+  } else {
+    DescriptorAddr = ArgPtr;
+  }
+
+  const bool ForceDelete = ArgType & TGT_ACC_MAPTYPE_FINALIZE;
+  const bool HasFlagFrom = ArgType & TGT_ACC_MAPTYPE_FROM;
+  const bool IsNoCreate = ArgType & TGT_ACC_MAPTYPE_NO_CREATE;
+  AccCopyOutType CopyType = AccCopyOutType::Never;
+  if (HasFlagFrom)
+    CopyType = AccCopyOutType::OnDelete;
+  if (ArgSize > 0) {
+    ODBG(ADT_Mapping)
+        << "We got size from the compiler - no descriptor parsing needed.";
+
+    handleSingleDataEnd<int64_t>(Loc, DescriptorAddr, ArgPtr, ArgSize,
+                                 ForceDelete, IsNoCreate, CopyType, MapType,
+                                 AsyncInfo, Device);
+    return;
+  }
+
+  ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+  Descs.dataEnd(Loc, ArgPtr, DescriptorAddr, ParentAllocation, ArgType,
+                ForceDelete, IsNoCreate, CopyType, MapType, AsyncInfo, Device);
+}
+
+void accTargetDataUpdate(ident_t *Loc, void *ArgBasePtr, void *ArgPtr,
+                         int64_t ArgSize, int64_t ArgType, char *ArgName,
+                         AccDataDesc *ArgDesc, AsyncInfoTy &AsyncInfo,
+                         DeviceTy &Device) {
+  // clang-format off
+  ODBG(ADT_Interface)
+      << "update "
+      << "ArgName=" << getNameFromMapping(ArgName) << ", "
+      << "ArgPtr=" << ArgPtr << ", "
+      << "ArgSize=" << ArgSize << ", "
+      << "ArgType=" << mapTypeToString(ArgType)
+      << " (" << llvm::format_hex(ArgType, 0) << "), "
+      << "ArgDesc=" << ArgDesc;
+  // clang-format on
+
+  if ((ArgType & TGT_ACC_MAPTYPE_LITERAL) ||
+      (ArgType & TGT_ACC_MAPTYPE_PRIVATE) || (ArgType & TGT_ACC_MAPTYPE_DEVPTR))
+    return;
+
+  if (!ArgPtr)
+    return;
+
+  if (ArgSize > 0) {
+    TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+        ArgPtr, ArgSize, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false,
+        /*MustContain=*/true);
+    if (!TPR.isPresent()) {
+      if (ArgType & TGT_ACC_MAPTYPE_IF_PRESENT) {
+        ODBG(ADT_Interface) << "Not present, if_present - skipping update.";
+        return;
+      }
+      REPORT_FATAL() << "Device mapping does not exist for update at " << Loc;
+    }
+    void *TgtPtr = TPR.TargetPointer;
+    if (ArgType & TGT_ACC_MAPTYPE_TO) {
+      ODBG(ADT_Interface) << "Update TO: " << ArgSize << " bytes hst:" << ArgPtr
+                          << " -> tgt:" << TgtPtr;
+      Device.submitData(TgtPtr, ArgPtr, ArgSize, AsyncInfo, TPR.getEntry());
+    }
+    if (ArgType & TGT_ACC_MAPTYPE_FROM) {
+      ODBG(ADT_Interface) << "Update FROM: " << ArgSize
+                          << " bytes tgt:" << TgtPtr << " -> hst:" << ArgPtr;
+      Device.retrieveData(ArgPtr, TgtPtr, ArgSize, AsyncInfo, TPR.getEntry());
+    }
+    return;
+  }
+
+  ArgDescriptorsTy Descs = parseAndVerifyArgDescs(Loc, ArgDesc);
+  Descs.dataUpdate(Loc, ArgPtr, ArgType, AsyncInfo, Device);
+}
+
+template <typename T>
+void withDeviceAndQueue(int64_t DeviceType, int64_t Async, T Callback) {
+  llvm::Expected<DeviceTy &> DeviceOrErr =
+      DM->getDevice(static_cast<acc_device_t>(DeviceType));
+  if (!DeviceOrErr)
+    REPORT_FATAL() << "Failed to get device: "
+                   << toString(DeviceOrErr.takeError());
+
+  DeviceTy &Device = *DeviceOrErr;
+
+  ODBG(ADT_Interface) << "with device type " << DeviceType << " and async "
+                      << asyncToString(Async);
+
+  if (Async == AccAsyncSync) {
+    AsyncInfoTy AsyncInfo(Device);
+    Callback(Device, AsyncInfo);
+  } else {
+    QueueAsyncInfoWrapperTy QueueAsyncInfo(Device, Async);
+    AsyncInfoTy &AsyncInfo = QueueAsyncInfo;
+    Callback(Device, AsyncInfo);
+  }
+}
+
+template <typename FuncTy, typename... ArgsTy>
+void forEachArg(FuncTy Func, bool Increasing, ident_t *Loc, uint32_t ArgNum,
+                void **ArgBasePtrs, void **ArgPtrs, int64_t *ArgSizes,
+                int64_t *ArgTypes, char **ArgNames, void **ArgMappers,
+                AccDataDesc **ArgDescs, ArgsTy &&...Args) {
+  assert(!ArgMappers && "we currently do not generate mappers");
+  ODBG(ADT_Interface) << "Got " << ArgNum << " args at " << Loc;
+  int32_t Start = Increasing ? 0 : ArgNum - 1;
+  int32_t End = Increasing ? ArgNum : -1;
+  int32_t Increment = Increasing ? 1 : -1;
+  for (int32_t I = Start; I != End; I += Increment) {
+    ODBG(ADT_Interface) << "Handling arg #" << I;
+    char *Name = ArgNames ? ArgNames[I] : nullptr;
+    Func(Loc, ArgBasePtrs[I], ArgPtrs[I], ArgSizes[I], ArgTypes[I], Name,
+         ArgDescs[I], Args...);
+  }
+}
+} // namespace
+
+namespace llvm::acc::target {
+void *accDataEnter(void *ArgBasePtr, void *ArgPtr, int64_t ArgSize,
+                   int64_t ArgType, int64_t Async) {
+  void *Result = nullptr;
+  withDeviceAndQueue(
+      acc_device_default, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        {
+          AccKernelArgsTy KA = {};
+          KernelArgsMappingInfoTy KI{KA, {}, {}, {}};
+          MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+              Device.getMappingInfo()
+                  .HostDataToTargetMap.getExclusiveAccessor();
+          accTargetDataBegin(nullptr, ArgBasePtr, ArgPtr, ArgSize, ArgType,
+                             nullptr, nullptr, AccRefCountingType::Dynamic,
+                             AsyncInfo, Device, HDTTMap, &KI);
+          assert(KI.Args.size() == 1);
+          Result = KI.Args[0];
+        }
+        dumpTargetPointerMappings(nullptr, Device);
+      });
+  return Result;
+}
+} // namespace llvm::acc::target
+
+EXTERN void __tgt_acc_declare(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                              uint32_t ArgNum, void **ArgBasePtrs,
+                              void **ArgPtrs, int64_t *ArgSizes,
+                              int64_t *ArgTypes, char **ArgNames,
+                              void **ArgMappers, AccDataDesc **ArgDescs,
+                              int64_t Async, __tgt_bin_desc *Desc) {
+  FUNC_LOGGER(Loc);
+  assert(!Desc);
+
+  withDeviceAndQueue(
+      DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        {
+          MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+              Device.getMappingInfo()
+                  .HostDataToTargetMap.getExclusiveAccessor();
+          forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, ArgNum,
+                     ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+                     ArgMappers, ArgDescs, AccRefCountingType::Structured,
+                     AsyncInfo, Device, HDTTMap, /*KI=*/nullptr);
+        }
+        dumpTargetPointerMappings(Loc, Device);
+      });
+}
+
+EXTERN void __tgt_acc_data_update(ident_t *Loc, int64_t Flags,
+                                  int64_t DeviceType, uint32_t ArgNum,
+                                  void **ArgBasePtrs, void **ArgPtrs,
+                                  int64_t *ArgSizes, int64_t *ArgTypes,
+                                  char **ArgNames, void **ArgMappers,
+                                  AccDataDesc **ArgDescs, int64_t Async) {
+  FUNC_LOGGER(Loc);
+  withDeviceAndQueue(
+      DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        forEachArg(accTargetDataUpdate, /*Increasing=*/true, Loc, ArgNum,
+                   ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+                   ArgMappers, ArgDescs, AsyncInfo, Device);
+      });
+}
+
+EXTERN void __tgt_acc_data_enter(ident_t *Loc, int64_t Flags,
+                                 int64_t DeviceType, uint32_t ArgNum,
+                                 void **ArgBasePtrs, void **ArgPtrs,
+                                 int64_t *ArgSizes, int64_t *ArgTypes,
+                                 char **ArgNames, void **ArgMappers,
+                                 AccDataDesc **ArgDescs, int64_t Async) {
+  FUNC_LOGGER(Loc);
+  withDeviceAndQueue(
+      DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        {
+          MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+              Device.getMappingInfo()
+                  .HostDataToTargetMap.getExclusiveAccessor();
+          forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, ArgNum,
+                     ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+                     ArgMappers, ArgDescs, AccRefCountingType::Dynamic,
+                     AsyncInfo, Device, HDTTMap, /*KI=*/nullptr);
+        }
+        dumpTargetPointerMappings(Loc, Device);
+      });
+}
+
+EXTERN void __tgt_acc_data_exit(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                                uint32_t ArgNum, void **ArgBasePtrs,
+                                void **ArgPtrs, int64_t *ArgSizes,
+                                int64_t *ArgTypes, char **ArgNames,
+                                void **ArgMappers, AccDataDesc **ArgDescs,
+                                int64_t Async) {
+  FUNC_LOGGER(Loc);
+  withDeviceAndQueue(
+      DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        forEachArg(accTargetDataEnd, /*Increasing=*/false, Loc, ArgNum,
+                   ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+                   ArgMappers, ArgDescs, AccRefCountingType::Dynamic, AsyncInfo,
+                   Device);
+        dumpTargetPointerMappings(Loc, Device);
+      });
+}
+
+EXTERN void __tgt_acc_data_begin(ident_t *Loc, int64_t Flags,
+                                 int64_t DeviceType, uint32_t ArgNum,
+                                 void **ArgBasePtrs, void **ArgPtrs,
+                                 int64_t *ArgSizes, int64_t *ArgTypes,
+                                 char **ArgNames, void **ArgMappers,
+                                 AccDataDesc **ArgDescs, int64_t Async) {
+  FUNC_LOGGER(Loc);
+  withDeviceAndQueue(
+      DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        {
+          MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+              Device.getMappingInfo()
+                  .HostDataToTargetMap.getExclusiveAccessor();
+          forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, ArgNum,
+                     ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+                     ArgMappers, ArgDescs, AccRefCountingType::Structured,
+                     AsyncInfo, Device, HDTTMap, /*KI=*/nullptr);
+        }
+        dumpTargetPointerMappings(Loc, Device);
+      });
+}
+
+EXTERN void __tgt_acc_data_end(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                               uint32_t ArgNum, void **ArgBasePtrs,
+                               void **ArgPtrs, int64_t *ArgSizes,
+                               int64_t *ArgTypes, char **ArgNames,
+                               void **ArgMappers, AccDataDesc **ArgDescs,
+                               int64_t Async) {
+  FUNC_LOGGER(Loc);
+  withDeviceAndQueue(
+      DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        forEachArg(accTargetDataEnd, /*Increasing=*/false, Loc, ArgNum,
+                   ArgBasePtrs, ArgPtrs, ArgSizes, ArgTypes, ArgNames,
+                   ArgMappers, ArgDescs, AccRefCountingType::Structured,
+                   AsyncInfo, Device);
+        dumpTargetPointerMappings(Loc, Device);
+      });
+}
+
+void *getDeviceEntryPtr(void *HostPtr, DeviceTy &Device) {
+  int32_t DeviceId = Device.DeviceID;
+  TableMap *TM = llvm::offload::getTableMap(HostPtr);
+  __tgt_target_table *TargetTable = nullptr;
+  {
+    std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
+    assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&
+           "Not expecting a device ID outside the table's bounds!");
+    TargetTable = TM->Table->TargetsTable[DeviceId];
+  }
+  assert(TargetTable && "Global data has not been mapped\n");
+
+  void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address;
+  ODBG(ADT_Kernel) << "Launching target execution "
+                   << TargetTable->EntriesBegin[TM->Index].SymbolName
+                   << " with pointer " << TgtEntryPtr << " (index=" << TM->Index
+                   << ").";
+  return TgtEntryPtr;
+}
+
+EXTERN int __tgt_acc_kernel(ident_t *Loc, void *Kernel, int64_t Flags,
+                            int64_t DeviceType, AccKernelArgsTy *Args,
+                            int64_t Async, const char *KernelName,
+                            __tgt_bin_desc *Desc) {
+  FUNC_LOGGER(Loc);
+  assert(!Desc);
+
+  withDeviceAndQueue(
+      DeviceType, Async, [&](DeviceTy &Device, AsyncInfoTy &AsyncInfo) {
+        MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+            Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
+
+        SmallVector<void *> TgtArgs;
+        SmallVector<ptrdiff_t> TgtOffsets;
+        KernelArgsMappingInfoTy KI{*Args, {}, {}, {}};
+        forEachArg(accTargetDataBegin, /*Increasing=*/true, Loc, Args->ArgNum,
+                   Args->ArgBasePtrs, Args->ArgPtrs, Args->ArgSizes,
+                   Args->ArgTypes, Args->ArgNames, Args->ArgMappers,
+                   Args->ArgDescs, AccRefCountingType::Structured, AsyncInfo,
+                   Device, HDTTMap, &KI);
+        HDTTMap.destroy();
+
+        KernelLaunchParamsTy LaunchParams = KI.getLaunchArgs();
+        KernelArgsTy DeviceArgs = {0};
+        DeviceArgs.Version = 4;
+        DeviceArgs.ArgPtrs = reinterpret_cast<void **>(&LaunchParams);
+        DeviceArgs.Flags.IsCUDA = true;
+        DeviceArgs.DynCGroupMem = Args->SmemSize;
+
+        DeviceArgs.UserNumBlocks[0] = Args->NumGangs[0];
+        DeviceArgs.UserNumBlocks[1] = Args->NumGangs[1];
+        DeviceArgs.UserNumBlocks[2] = Args->NumGangs[2];
+        DeviceArgs.UserThreadLimit[0] = Args->VectorLength;
+        DeviceArgs.UserThreadLimit[1] = Args->NumWorkers;
+        DeviceArgs.UserThreadLimit[2] = 1;
+
+        void *TgtEntryPtr = getDeviceEntryPtr(Kernel, Device);
+        ODBG(ADT_Interface)
+            << "Launching device kernel " << KernelName
+            << " with entry hst: " << Kernel << " tgt: " << TgtEntryPtr
+            << " with " << KI.Args.size() << " (" << Args->ArgNum << ") args";
+        ODBG(ADT_Interface) << "NumGangs " << Args->NumGangs[0] << ", "
+                            << Args->NumGangs[1] << ", " << Args->NumGangs[2];
+        ODBG(ADT_Interface) << "VectorLength " << Args->VectorLength;
+        ODBG(ADT_Interface) << "NumWorkers " << Args->NumWorkers;
+        ODBG(ADT_Interface) << "SmemSize " << Args->SmemSize;
+
+        assert(KI.Args.size() * sizeof(void *) == LaunchParams.Size);
+        for (unsigned I = 0; I < KI.Args.size(); I++)
+          ODBG(ADT_Interface) << "Arg #" << I << ": " << KI.Args[I];
+
+        if (Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
+                                DeviceArgs, nullptr,
+                                AsyncInfo) != OFFLOAD_SUCCESS)
+          REPORT_FATAL() << "Kernel launch failed";
+
+        forEachArg(accTargetDataEnd, /*Increasing=*/false, Loc, Args->ArgNum,
+                   Args->ArgBasePtrs, Args->ArgPtrs, Args->ArgSizes,
+                   Args->ArgTypes, Args->ArgNames, Args->ArgMappers,
+                   Args->ArgDescs, AccRefCountingType::Structured, AsyncInfo,
+                   Device);
+
+        dumpTargetPointerMappings(Loc, Device);
+
+        auto LaunchAllocDeleter = [Device = &Device,
+                                   LaunchAllocs = KI.LaunchAllocs]() {
+          for (void *LaunchAlloc : LaunchAllocs)
+            if (int32_t Ret = Device->deleteData(LaunchAlloc);
+                Ret != OFFLOAD_SUCCESS)
+              return Ret;
+          return OFFLOAD_SUCCESS;
+        };
+        AsyncInfo.addPostProcessingFunction(LaunchAllocDeleter);
+      });
+  return OFFLOAD_SUCCESS;
+}
+
+EXTERN void *__tgt_acc_get_deviceptr(ident_t *Loc, void *BasePtr, int64_t Flags,
+                                     void *HostPtr) {
+  FUNC_LOGGER(Loc);
+  ODBG(ADT_Interface) << Loc << "BasePtr: " << BasePtr << ", "
+                      << "Flags: " << llvm::format_hex(Flags, 0) << ", "
+                      << "HostPtr: " << HostPtr;
+
+  void *DevicePtr = nullptr;
+
+  llvm::Expected<DeviceTy &> DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << "Failed to get device: "
+                   << toString(DeviceOrErr.takeError());
+  DeviceTy &Device = *DeviceOrErr;
+
+  MappingInfoTy::HDTTMapAccessorTy HDTTMap =
+      Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
+  DevicePtr = Device.getMappingInfo().getTgtPtrBegin(HDTTMap, HostPtr, 0);
+
+  ODBG(ADT_Interface) << "DevicePtr: " << DevicePtr;
+
+  return DevicePtr;
+}
+
+EXTERN void __tgt_acc_set_default_async(ident_t *Loc, int64_t Async) {
+  FUNC_LOGGER(Loc);
+  ODBG(ADT_Interface) << Loc << ": Set async=" << asyncToString(Async);
+
+  if (Async == AccAsyncSync) {
+    REPORT_FATAL() << Loc
+                   << "The default queue cannot be set to `acc_async_sync'";
+  } else if (Async == AccAsyncNoval) {
+    REPORT_FATAL() << Loc
+                   << "The default queue cannot be set to `acc_async_noval'";
+  } else if (Async == AccAsyncDefault) {
+    Async = AccAsyncDefaultQueue;
+  } else if (Async < 0) {
+    REPORT_FATAL() << Loc << "Negative queues are invalid";
+  }
+
+  icv::AccDefaultAsyncVar = Async;
+}
+
+EXTERN void __tgt_acc_set_device_num(ident_t *Loc, int64_t Flags,
+                                     int64_t DeviceType, int64_t DeviceNum) {
+  FUNC_LOGGER(Loc);
+  // OpenACC 3.3: If the value of device_num argument is negative, the runtime
+  // will revert to the default behavior, which is implementation-defined. A set
+  // device_num directive is functionally equivalent
+  if (DeviceNum < 0) {
+    DeviceNum = 0;
+  }
+
+  // OpenACC 3.3: If the value of the device_type argument is zero or the clause
+  // does not appear, the selected device number will be used for all attached
+  // accelerator types.
+  if (DeviceType == 0) {
+    DM->setAllDeviceId(DeviceNum);
+    return;
+  }
+
+  DM->setDeviceType(static_cast<acc_device_t>(DeviceType));
+  DM->setDeviceId(DeviceNum);
+}
+
+EXTERN void __tgt_acc_set_device_type(ident_t *Loc, int64_t Flags,
+                                      int64_t DeviceType) {
+  FUNC_LOGGER(Loc);
+  DM->setDeviceType(static_cast<acc_device_t>(DeviceType));
+}
+
+EXTERN int __tgt_acc_wait(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                          int32_t DeviceNum, uint32_t WaitNum,
+                          int64_t *WaitList, int64_t Async) {
+  FUNC_LOGGER(Loc);
+  ODBG(ADT_Interface) << Loc << "\n"
+                      << "DeviceNum: " << DeviceNum << ", "
+                      << "DeviceType: " << DeviceType << ", "
+                      << "WaitNum: " << WaitNum;
+  for (size_t I = 0; I < WaitNum; I++) {
+    ODBG(ADT_Interface) << "WaitList[" << I
+                        << "]: " << asyncToString(WaitList[I]);
+  }
+  ODBG(ADT_Interface) << "Async: " << asyncToString(Async)
+                      << " Flags: " << llvm::format_hex(Flags, 0);
+
+  accAsyncWait(Loc, DM->getPMDeviceId(), WaitNum, WaitList);
+
+  return 0;
+}
+
+namespace {
+static std::mutex InitMutex;
+uint32_t InitRefCount = 0;
+
+static void initAccRuntime() {
+  FUNC_LOGGER();
+  initRuntime(/*OffloadEnabled=*/true);
+  // TODO Blindly register all rtls for now. In reality we should only be
+  // initializing the requested types in case we come from __tgt_acc_init(), or
+  // only the ones we have device code for.
+  __tgt_init_all_rtls();
+
+  InitRefCount++;
+  if (InitRefCount == 1) {
+    llvm::acc::target::DM = new llvm::acc::target::DeviceManagerTy();
+    llvm::acc::target::DM->init();
+
+    llvm::acc::target::QueueManager = new llvm::acc::target::QueueManagerTy();
+    llvm::acc::target::QueueManager->init();
+  }
+  llvm::acc::target::DM->refreshDeviceMapping(/*UpdateDeviceType=*/true);
+}
+
+static void deinitAccRuntime() {
+  FUNC_LOGGER();
+  if (InitRefCount == 1) {
+    llvm::acc::target::QueueManager->deinit();
+    delete llvm::acc::target::QueueManager;
+    llvm::acc::target::QueueManager = nullptr;
+
+    llvm::acc::target::DM->deinit();
+    delete llvm::acc::target::DM;
+    llvm::acc::target::DM = nullptr;
+  }
+  InitRefCount--;
+
+  deinitRuntime();
+}
+} // namespace
+
+EXTERN void __tgt_acc_init(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                           int64_t DeviceNum) {
+  std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+  FUNC_LOGGER(Loc);
+  REPORT_WARN() << "acc init ignores user's request and initializes all "
+                   "available devices.";
+  initAccRuntime();
+  std::atexit([]() {
+    std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+    FUNC_LOGGER();
+    deinitAccRuntime();
+  });
+}
+
+EXTERN void __tgt_acc_shutdown(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                               int64_t DeviceNum) {
+  std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+  FUNC_LOGGER(Loc);
+  REPORT_WARN() << "acc shutdown is ignored.";
+}
+
+EXTERN void __tgt_acc_register_lib(__tgt_bin_desc *Desc) {
+  std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+  FUNC_LOGGER();
+  initAccRuntime();
+  if (PM->delayRegisterLib(__tgt_acc_register_lib, Desc))
+    return;
+
+  PM->registerLib(Desc);
+  llvm::acc::target::DM->refreshDeviceMapping(/*UpdateDeviceType=*/true);
+}
+
+EXTERN void __tgt_acc_unregister_lib(__tgt_bin_desc *Desc) {
+  std::scoped_lock<decltype(InitMutex)> Lock(InitMutex);
+  FUNC_LOGGER();
+  PM->unregisterLib(Desc);
+
+  deinitAccRuntime();
+}
diff --git a/offload/libacctarget/Interface.h b/offload/libacctarget/Interface.h
new file mode 100644
index 0000000000000..d65055d95f590
--- /dev/null
+++ b/offload/libacctarget/Interface.h
@@ -0,0 +1,270 @@
+//===- Interface.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TGT_ACC_TARGET_H__
+#define __TGT_ACC_TARGET_H__
+
+#ifdef __cplusplus
+#include "flang-rt/runtime/descriptor.h"
+using namespace Fortran::ISO;
+#else
+#include "flang/ISO_Fortran_binding.h"
+#endif
+
+#include "Shared/APITypes.h"
+#include "Shared/SourceInfo.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+// Portable alignment attribute for C89/C99 compatibility
+#if defined(_MSC_VER)
+#define ACC_ALIGNED(x) __declspec(align(x))
+#elif defined(__GNUC__) || defined(__clang__)
+#define ACC_ALIGNED(x) __attribute__((aligned(x)))
+#else
+#define ACC_ALIGNED(x)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//===----------------------------------------------------------------------===//
+// OpenACC Target Offload Types and Enums
+//===----------------------------------------------------------------------===//
+
+enum {
+  // no flags
+  TGT_ACC_MAPTYPE_NONE = 0x0,
+  // copy data from host to device
+  TGT_ACC_MAPTYPE_TO = 0x1, // enter data
+  // copy data from device to host
+  TGT_ACC_MAPTYPE_FROM = 0x2, // exit data
+  // force unmapping of data
+  TGT_ACC_MAPTYPE_FINALIZE = 0x8,
+  // map the pointer as well as the pointee
+  TGT_ACC_MAPTYPE_PTR_AND_OBJ = 0x10,
+  // private variable - not mapped
+  TGT_ACC_MAPTYPE_PRIVATE = 0x80,
+  // copy by value - not mapped
+  TGT_ACC_MAPTYPE_LITERAL = 0x100,
+  // device pointer - already mapped
+  TGT_ACC_MAPTYPE_DEVPTR = 0x400,
+  // device pointer
+  TGT_ACC_MAPTYPE_MANAGED_DEVPTR = 0x800,
+  // present or don't create
+  TGT_ACC_MAPTYPE_NO_CREATE = 0x2000,
+  // private variable - gang
+  TGT_ACC_MAPTYPE_GANG_PRIVATE = 0x4000,
+  // private variable - worker
+  TGT_ACC_MAPTYPE_WORKER_PRIVATE = 0x8000,
+  // private variable - vector
+  TGT_ACC_MAPTYPE_VECTOR_PRIVATE = 0x10000,
+  // zero modifier
+  TGT_ACC_MAPTYPE_INIT_ZERO = 0x20000,
+  // device resident memory - persistent allocation
+  TGT_ACC_MAPTYPE_DEVICE_RESIDENT = 0x40000,
+  // present or not
+  TGT_ACC_MAPTYPE_IF_PRESENT = 0x80000,
+  // present clause: skip attach/detach to preserve user-managed pointers
+  TGT_ACC_MAPTYPE_PRESENT = 0x100000,
+};
+
+/// Array descriptor types
+enum {
+  TGT_ACC_DESC_GENERIC = 0,     // Generic type descriptor.
+  TGT_ACC_DESC_F18 = 1,         // Fortran 2018 type descriptor.
+  TGT_ACC_DESC_MEMREF = 2,      // MemRef type descriptor.
+  TGT_ACC_DESC_OPENACC = 0x1000 // OpenACC descriptor.
+};
+
+/// Device pointer type.
+typedef uintptr_t tgt_acc_devptr_t;
+
+/// Type descriptor base struct.
+typedef struct {
+  // Version of the descriptor.
+  int32_t Version;
+} AccDataDesc;
+
+/// Generic type descriptor.
+typedef struct {
+  AccDataDesc Base;
+} AccDataDescGeneric;
+
+/// F18 type descriptor.
+typedef struct {
+  AccDataDesc Base;
+  CFI_cdesc_t *FortranDescriptor;
+} AccDataDescF18;
+
+/// The structure defined by LLVMTypeConverter::getMemRefDescriptorFields.
+typedef struct {
+  void *allocatedPtr;
+  void *alignedPtr;
+  uint64_t offset;
+
+  uint64_t sizes[1];
+// Below are the real fields in the struct where Rank is a compile-time
+// constant. We use offsets from the above sizes to obtain the addresses of
+// the sizes and strides arrays.
+#if 0
+  uint64_t sizes[Rank];
+  uint64_t strides[Rank];
+#endif
+} MemRefDesc;
+
+/// MemRef type descriptor.
+typedef struct {
+  AccDataDesc Base;
+  unsigned char Rank;
+  uint64_t ElementSize;
+  MemRefDesc *MemRefDescriptor;
+} AccDataDescMemRef;
+
+/// OpenACC descriptor.
+typedef struct {
+  AccDataDesc Base;
+  ACC_ALIGNED(8) unsigned char Rank;
+  int64_t ElementSize;
+  int64_t *LowerBounds;
+  int64_t *UpperBounds;
+  int64_t *Extents;
+  int64_t *StridesInBytes;
+  int64_t *StartIndices;
+} AccDataDescOpenACC;
+
+/// This struct contains all of the arguments to a target kernel region launch.
+typedef struct {
+  // Version of this struct for ABI compatibility.
+  uint32_t Version;
+  // Number of arguments in each input pointer.
+  uint32_t ArgNum;
+  // Base pointer of each argument (e.g. a struct).
+  void **ArgBasePtrs;
+  // Pointer to the argument data.
+  void **ArgPtrs;
+  // Size of the argument data in bytes.
+  int64_t *ArgSizes;
+  // Type of the data (e.g. to / from).
+  int64_t *ArgTypes;
+  // Name of the data for debugging, possibly null.
+  char **ArgNames;
+  // User-defined mappers (e.g. C++ copy ctors), possibly null.
+  void **ArgMappers;
+  // Type descriptors.
+  AccDataDesc **ArgDescs;
+  // Loop tripcount.
+  uint64_t Tripcount;
+  // Values of the num_gangs clause, in three dimensions.
+  int64_t NumGangs[3];
+  // Value of the num_workers clause.
+  int64_t NumWorkers;
+  // Value of the vector_length clause.
+  int64_t VectorLength;
+  // Size of shared memory.
+  int64_t SmemSize;
+} AccKernelArgsTy;
+
+//===----------------------------------------------------------------------===//
+// OpenACC Target Offload Runtime Compiler Interface API
+//===----------------------------------------------------------------------===//
+
+/// adds a target shared library to the target execution image
+void __tgt_acc_register_lib(__tgt_bin_desc *Desc);
+
+/// removes a target shared library from the target execution image
+void __tgt_acc_unregister_lib(__tgt_bin_desc *Desc);
+
+/// 'acc init' directive
+void __tgt_acc_init(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                    int64_t DeviceNum);
+
+/// 'acc shutdown' directive
+void __tgt_acc_shutdown(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                        int64_t DeviceNum);
+
+/// 'acc declare' directive
+void __tgt_acc_declare(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                       uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+                       int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+                       void **ArgMappers, AccDataDesc **ArgDescs, int64_t Async,
+                       __tgt_bin_desc *Desc);
+
+/// 'acc enter data' directive
+void __tgt_acc_data_enter(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                          uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+                          int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+                          void **ArgMappers, AccDataDesc **ArgDescs,
+                          int64_t Async);
+
+/// 'acc exit data' directive
+void __tgt_acc_data_exit(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                         uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+                         int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+                         void **ArgMappers, AccDataDesc **ArgDescs,
+                         int64_t Async);
+
+/// 'acc update' directive
+void __tgt_acc_data_update(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                           uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+                           int64_t *ArgSizes, int64_t *ArgTypes,
+                           char **ArgNames, void **ArgMappers,
+                           AccDataDesc **ArgDescs, int64_t Async);
+
+/// data mapping begin (for `acc data` construct or compute construct)
+void __tgt_acc_data_begin(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                          uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+                          int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+                          void **ArgMappers, AccDataDesc **ArgDescs,
+                          int64_t Async);
+
+/// data mapping end (for `acc data` construct or compute construct)
+void __tgt_acc_data_end(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                        uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+                        int64_t *ArgSizes, int64_t *ArgTypes, char **ArgNames,
+                        void **ArgMappers, AccDataDesc **ArgDescs,
+                        int64_t Async);
+
+/// compute construct directive
+int __tgt_acc_kernel(ident_t *Loc, void *Kernel, int64_t Flags,
+                     int64_t DeviceType, AccKernelArgsTy *Args, int64_t Async,
+                     const char *KernelName, __tgt_bin_desc *Desc);
+
+/// 'acc wait' directive
+int __tgt_acc_wait(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                   int32_t DeviceNum, uint32_t WaitNum, int64_t *WaitList,
+                   int64_t Async);
+
+/// `acc host_data use_device` directive
+void *__tgt_acc_get_deviceptr(ident_t *Loc, void *BasePtr, int64_t Flags,
+                              void *HostPtr);
+
+/// 'acc set default_async' directive
+void __tgt_acc_set_default_async(ident_t *Loc, int64_t Async);
+
+/// 'acc set device_num' directive
+void __tgt_acc_set_device_num(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                              int64_t DeviceNum);
+
+/// 'acc set device_type' directive
+void __tgt_acc_set_device_type(ident_t *Loc, int64_t Flags, int64_t DeviceType);
+
+/// Mirror allocation for declare action recipes
+void __tgt_acc_mirror_alloc(ident_t *Loc, int64_t Flags, int64_t DeviceType,
+                            uint32_t ArgNum, void **ArgBasePtrs, void **ArgPtrs,
+                            int64_t *ArgSizes, int64_t *ArgTypes,
+                            char **ArgNames, void **ArgMappers,
+                            AccDataDesc **ArgDescs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __TGT_ACC_TARGET_H__
diff --git a/offload/libacctarget/Logger.h b/offload/libacctarget/Logger.h
new file mode 100644
index 0000000000000..7ffc9ae8e2bed
--- /dev/null
+++ b/offload/libacctarget/Logger.h
@@ -0,0 +1,47 @@
+//===- Logger.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ACC_OFFLOAD_LOGGER_H_
+#define LLVM_ACC_OFFLOAD_LOGGER_H_
+
+#include "Shared/Debug.h"
+#include "Shared/SourceInfo.h"
+#include <optional>
+
+namespace llvm::acc::target::debug {
+inline std::string formatLoc(ident_t *Loc) {
+  SourceInfo SI(Loc);
+  return std::string(SI.getFilename()) + ":" + std::to_string(SI.getLine());
+}
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ident_t *Loc) {
+  return OS << formatLoc(Loc);
+}
+struct ScopeLoggerTy {
+  const char *ScopeName;
+  std::optional<ident_t *> Loc = std::nullopt;
+  ScopeLoggerTy(const char *ScopeName, ident_t *Loc)
+      : ScopeName(ScopeName), Loc(Loc) {
+    ODBG() << "> " << ScopeName << "(" << Loc << ")";
+  }
+  ScopeLoggerTy(const char *ScopeName) : ScopeName(ScopeName) {
+    ODBG() << "> " << ScopeName;
+  }
+  ~ScopeLoggerTy() {
+    if (Loc)
+      ODBG() << "< " << ScopeName << "(" << *Loc << ")";
+    else
+      ODBG() << "< " << ScopeName;
+  }
+};
+} // namespace llvm::acc::target::debug
+
+#define FUNC_LOGGER(...)                                                       \
+  ScopeLoggerTy FunctionScopeLogger(__FUNCTION__, ##__VA_ARGS__)
+
+#endif // LLVM_ACC_OFFLOAD_LOGGER_H_
diff --git a/offload/libacctarget/Private.h b/offload/libacctarget/Private.h
new file mode 100644
index 0000000000000..c7d15aa23c189
--- /dev/null
+++ b/offload/libacctarget/Private.h
@@ -0,0 +1,29 @@
+//===- Private.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _OPENACC_PRIVATE_H_
+#define _OPENACC_PRIVATE_H_
+
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm::acc::target {
+int accIsPresent(void *);
+void *accAlloc(size_t);
+void accFree(void *);
+void accMemcpyFromDevice(void *, void *, size_t);
+void accMemcpyToDevice(void *, void *, size_t);
+void accMemcpyD2D(void *, void *, size_t, int, int);
+void accMapData(void *, void *, size_t);
+void accUnmapData(void *);
+
+void *accDataEnter(void *ArgBasePtr, void *ArgPtr, int64_t ArgSize,
+                   int64_t ArgType, int64_t Async);
+} // namespace llvm::acc::target
+
+#endif // _OPENACC_PRIVATE_H_
diff --git a/offload/libacctarget/QueueManager.cpp b/offload/libacctarget/QueueManager.cpp
new file mode 100644
index 0000000000000..e9fa4a82cc559
--- /dev/null
+++ b/offload/libacctarget/QueueManager.cpp
@@ -0,0 +1,179 @@
+//===- QueueManager.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "QueueManager.h"
+#include "PluginManager.h"
+#include "Private.h"
+#include "Shared/Debug.h"
+
+namespace llvm::acc::target {
+QueueManagerTy *QueueManager = nullptr;
+} // namespace llvm::acc::target
+
+using namespace llvm::acc::target;
+
+static void synchronizeQueueOrDie(QueueIdTy Queue, DeviceTy &Device,
+                                  AsyncInfoTy *AsyncInfo) {
+
+  ODBG() << "Synchronizing stream " << AsyncInfo << " for device "
+         << Device.DeviceID << " " << &Device << " with queue ID " << Queue;
+  auto Res = AsyncInfo->synchronize();
+  if (Res != OFFLOAD_SUCCESS)
+    FATAL_MESSAGE(Device.DeviceID,
+                  "Failed to synchronize queue %" PRIi32 " on device %d", Queue,
+                  Device.DeviceID);
+}
+
+static QueueManagerTy::StatusTy
+queryQueueOrDie(QueueIdTy Queue, DeviceTy &Device, AsyncInfoTy *AsyncInfo) {
+
+  ODBG() << "Querying stream " << AsyncInfo << " for device " << Device.DeviceID
+         << " " << &Device << " with queue ID " << Queue;
+  auto Res = AsyncInfo->query();
+  if (Res == OFFLOAD_FAIL)
+    FATAL_MESSAGE(Device.DeviceID,
+                  "Failed to query queue %" PRIi32 " on device %d", Queue,
+                  Device.DeviceID);
+  return static_cast<QueueManagerTy::StatusTy>(Res);
+}
+
+QueueManagerTy::QueueManagerTy() {}
+
+QueueManagerTy::~QueueManagerTy() {}
+
+void QueueManagerTy::synchronize() {
+  for (auto &[Key, Q] : QueueMap) {
+    auto &[D, Id] = Key;
+    synchronizeQueueOrDie(Id, *D, Q.get());
+  }
+}
+
+void QueueManagerTy::synchronize(DeviceTy &Device) {
+  for (auto &[Key, Q] : QueueMap) {
+    auto &[D, Id] = Key;
+    if (&Device != D)
+      continue;
+    synchronizeQueueOrDie(Id, Device, Q.get());
+  }
+}
+
+void QueueManagerTy::synchronize(DeviceTy &Device, QueueIdTy Queue) {
+  AsyncInfoTy *AsyncInfo = QueueManager->get(Device, Queue);
+  synchronizeQueueOrDie(Queue, Device, AsyncInfo);
+}
+
+QueueManagerTy::StatusTy QueueManagerTy::query() {
+  for (auto &[Key, Q] : QueueMap) {
+    auto &[D, Id] = Key;
+    if (queryQueueOrDie(Id, *D, Q.get()) == StatusTy::NOT_READY)
+      return StatusTy::NOT_READY;
+  }
+  return StatusTy::READY;
+}
+
+QueueManagerTy::StatusTy QueueManagerTy::query(DeviceTy &Device) {
+  for (auto &[Key, Q] : QueueMap) {
+    auto &[D, Id] = Key;
+    if (&Device != D)
+      continue;
+    if (queryQueueOrDie(Id, *D, Q.get()) == StatusTy::NOT_READY)
+      return StatusTy::NOT_READY;
+  }
+  return StatusTy::READY;
+}
+
+QueueManagerTy::StatusTy QueueManagerTy::query(DeviceTy &Device,
+                                               QueueIdTy Queue) {
+  AsyncInfoTy *AsyncInfo = QueueManager->get(Device, Queue);
+  return queryQueueOrDie(Queue, Device, AsyncInfo);
+}
+
+AsyncInfoTy *QueueManagerTy::get(DeviceTy &Device, QueueIdTy QueueId) {
+  static std::mutex Mutex;
+  std::lock_guard<std::mutex> G(Mutex);
+
+  auto Insertion = QueueMap.insert({std::make_pair(&Device, QueueId), nullptr});
+  if (Insertion.second) {
+    Insertion.first->second = std::make_unique<AsyncInfoTy>(
+        Device, AsyncInfoTy::SyncTy::STATIC_NON_BLOCKING);
+    ODBG() << "Initialized new stream for device " << &Device << " id "
+           << QueueId << " -> " << Insertion.first->second.get();
+  }
+  return Insertion.first->second.get();
+}
+
+namespace llvm::acc::target {
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, int64_t WaitArg) {
+  int64_t WaitArgs[] = {WaitArg};
+  accAsyncWait(Loc, DeviceId, 1, WaitArgs);
+}
+
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, uint32_t WaitNum,
+                  int64_t *WaitList) {
+  ODBG() << "Synchronizing streams for device " << DeviceId;
+
+  auto DeviceOrErr = PM->getDevice(DeviceId);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+  if (WaitNum == 0) {
+    QueueManager->synchronize(*DeviceOrErr);
+  } else {
+    for (unsigned I = 0; I < WaitNum; I++)
+      QueueManager->synchronize(*DeviceOrErr, WaitList[I]);
+  }
+}
+
+void accAsyncWaitAll(ident_t *Loc, int64_t DeviceId) {
+  ODBG() << "Synchronizing all streams for device " << DeviceId;
+
+  auto DeviceOrErr = PM->getDevice(DeviceId);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+  QueueManager->synchronize(*DeviceOrErr);
+}
+
+void accAsyncWaitAll(ident_t *Loc) {
+  ODBG() << "Synchronizing all streams";
+  QueueManager->synchronize();
+}
+
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, int64_t TestArg) {
+  int64_t TestList[] = {TestArg};
+  return accAsyncTest(Loc, DeviceId, 1, TestList);
+}
+
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, uint32_t TestNum,
+                 int64_t *TestList) {
+  ODBG() << "Querying streams for device " << DeviceId;
+
+  auto DeviceOrErr = PM->getDevice(DeviceId);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+  for (unsigned I = 0; I < TestNum; I++)
+    if (QueueManager->query(*DeviceOrErr, TestList[I]) ==
+        QueueManagerTy::StatusTy::NOT_READY)
+      return static_cast<int>(QueueManagerTy::StatusTy::NOT_READY);
+  return static_cast<int>(QueueManagerTy::StatusTy::READY);
+}
+
+int accAsyncTestAll(ident_t *Loc, int64_t DeviceId) {
+  ODBG() << "Querying all streams for device " << DeviceId;
+
+  auto DeviceOrErr = PM->getDevice(DeviceId);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+  return static_cast<int>(QueueManager->query(*DeviceOrErr));
+}
+
+int accAsyncTestAll(ident_t *Loc) {
+  ODBG() << "Querying all streams";
+  return static_cast<int>(QueueManager->query());
+}
+} // namespace llvm::acc::target
diff --git a/offload/libacctarget/QueueManager.h b/offload/libacctarget/QueueManager.h
new file mode 100644
index 0000000000000..13d3ce96122ed
--- /dev/null
+++ b/offload/libacctarget/QueueManager.h
@@ -0,0 +1,77 @@
+//===- QueueManager.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _QUEUEMANAGER_H_
+#define _QUEUEMANAGER_H_
+
+#include "omptarget.h"
+
+namespace llvm::acc::target {
+
+using QueueIdTy = uint32_t;
+
+class QueueManagerTy {
+public:
+  QueueManagerTy();
+  ~QueueManagerTy();
+
+  void init() {}
+  void deinit() {}
+
+  enum class StatusTy { READY = 0, NOT_READY = 1 };
+
+  AsyncInfoTy *get(DeviceTy &Device, QueueIdTy QueueId);
+
+  void synchronize(DeviceTy &Device, QueueIdTy Queue);
+  void synchronize(DeviceTy &Device);
+  void synchronize();
+
+  StatusTy query(DeviceTy &Device, QueueIdTy Queue);
+  StatusTy query(DeviceTy &Device);
+  StatusTy query();
+
+private:
+  std::map<std::pair<DeviceTy *, QueueIdTy>, std::unique_ptr<AsyncInfoTy>>
+      QueueMap;
+};
+
+extern QueueManagerTy *QueueManager;
+
+class QueueAsyncInfoWrapperTy {
+  AsyncInfoTy *AsyncInfo;
+
+public:
+  QueueAsyncInfoWrapperTy(DeviceTy &Device, QueueIdTy QueueId) {
+    AsyncInfo = QueueManager->get(Device, QueueId);
+  }
+
+  ~QueueAsyncInfoWrapperTy() {}
+
+  operator AsyncInfoTy &() { return *AsyncInfo; }
+};
+
+extern QueueManagerTy *QueueManager;
+
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, int64_t WaitArg);
+void accAsyncWait(ident_t *Loc, int64_t DeviceId, uint32_t WaitNum,
+                  int64_t *WaitList);
+void accAsyncWaitAll(ident_t *Loc, int64_t DeviceId);
+void accAsyncWaitAll(ident_t *Loc);
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, int64_t TestArg);
+int accAsyncTest(ident_t *Loc, int64_t DeviceId, uint32_t TestNum,
+                 int64_t *TestList);
+int accAsyncTestAll(ident_t *Loc, int64_t DeviceId);
+int accAsyncTestAll(ident_t *Loc);
+} // namespace llvm::acc::target
+
+namespace llvm::acc::target::icv {
+// acc-default-async-var
+extern thread_local int32_t AccDefaultAsyncVar;
+} // namespace llvm::acc::target::icv
+
+#endif // _QUEUEMANAGER_H_
diff --git a/offload/libacctarget/RuntimeImpl.cpp b/offload/libacctarget/RuntimeImpl.cpp
new file mode 100644
index 0000000000000..fa645c29fa713
--- /dev/null
+++ b/offload/libacctarget/RuntimeImpl.cpp
@@ -0,0 +1,175 @@
+//===- AccImpl.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Debug.h"
+#include "DeviceManager.h"
+#include "Logger.h"
+#include "PluginManager.h"
+#include "Private.h"
+#include "Shared/Debug.h"
+
+using namespace llvm::acc::target::debug;
+
+namespace llvm::acc::target {
+int accIsPresent(void *Ptr) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << "Address " << Ptr;
+
+  auto DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+  TargetPointerResultTy TPR =
+      DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1,
+                                                   /*UpdateRefCount=*/false,
+                                                   /*UseHoldRefCount=*/false);
+  int Rc = TPR.isPresent();
+  ODBG(ADT_Interface) << "Result " << Rc;
+  return Rc;
+}
+
+void *accAlloc(size_t Size) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << "Allocating " << Size << " bytes";
+
+  if (Size <= 0) {
+    ODBG(ADT_Interface) << "Non-positive length";
+    return NULL;
+  }
+
+  void *Rc = NULL;
+  auto DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+  Rc = DeviceOrErr->allocData(Size, nullptr);
+  ODBG(ADT_Interface) << "Device ptr " << Rc;
+  return Rc;
+}
+
+void accFree(void *DevicePtr) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << "Address " << DevicePtr;
+  auto DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+  if (DeviceOrErr->deleteData(DevicePtr) == OFFLOAD_FAIL)
+    REPORT_FATAL() << "Failed to deallocate device ptr. Set "
+                      "OFFLOAD_TRACK_ALLOCATION_TRACES=1 to track allocations.";
+}
+
+void accMemcpyToDevice(void *Dst, void *Src, size_t Bytes) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << Dst << " <- " << Src << ", " << Bytes << " bytes";
+
+  if (!Dst || !Src || Bytes <= 0) {
+    if (Bytes == 0) {
+      ODBG(ADT_Interface) << "Zero bytes, nothing to do";
+      return;
+    }
+    REPORT() << "Invalid arguments";
+    return;
+  }
+
+  auto DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+  AsyncInfoTy AsyncInfo(*DeviceOrErr);
+  int Rc = DeviceOrErr->submitData(Dst, Src, Bytes, AsyncInfo);
+  ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accMemcpyFromDevice(void *Dst, void *Src, size_t Bytes) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << Dst << " <- " << Src << ", " << Bytes << " bytes";
+
+  if (!Dst || !Src || Bytes <= 0) {
+    if (Bytes == 0) {
+      ODBG(ADT_Interface) << "Zero bytes, nothing to do";
+      return;
+    }
+    REPORT() << "Invalid arguments";
+    return;
+  }
+
+  auto DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+  AsyncInfoTy AsyncInfo(*DeviceOrErr);
+  int Rc = DeviceOrErr->retrieveData(Dst, Src, Bytes, AsyncInfo);
+  ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accMemcpyD2D(void *Dst, void *Src, size_t Bytes, int DstDevice,
+                  int SrcDevice) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << Dst << " <- " << Src << ", " << Bytes << " bytes";
+
+  if (!Dst || !Src || Bytes <= 0) {
+    if (Bytes == 0) {
+      ODBG(ADT_Interface) << "Zero bytes, nothing to do";
+      return;
+    }
+    REPORT() << "Invalid arguments";
+    return;
+  }
+
+  auto DstDeviceOrErr = DM->getDevice();
+  if (!DstDeviceOrErr)
+    REPORT_FATAL() << toString(DstDeviceOrErr.takeError()).c_str();
+  auto SrcDeviceOrErr = DM->getDevice();
+  if (!SrcDeviceOrErr)
+    REPORT_FATAL() << toString(SrcDeviceOrErr.takeError()).c_str();
+  if (!SrcDeviceOrErr->isDataExchangable(*DstDeviceOrErr)) {
+    REPORT() << "D2D not allowed for current device type";
+    return;
+  }
+
+  AsyncInfoTy AsyncInfo(*SrcDeviceOrErr);
+  int Rc =
+      SrcDeviceOrErr->dataExchange(Src, *DstDeviceOrErr, Dst, Bytes, AsyncInfo);
+  ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accMapData(void *Hst, void *Dev, size_t Bytes) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << Hst << " <-> " << Dev << ", " << Bytes << " bytes";
+
+  if (!Hst || !Dev || Bytes <= 0) {
+    REPORT() << "Invalid arguments";
+    return;
+  }
+
+  auto DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+
+  int Rc = DeviceOrErr->getMappingInfo().associatePtr(
+      const_cast<void *>(Hst), const_cast<void *>(Dev), Bytes);
+  ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+void accUnmapData(void *Hst) {
+  FUNC_LOGGER();
+  ODBG(ADT_Interface) << Hst;
+
+  if (!Hst) {
+    REPORT() << "Invalid arguments";
+    return;
+  }
+
+  auto DeviceOrErr = DM->getDevice();
+  if (!DeviceOrErr)
+    REPORT_FATAL() << toString(DeviceOrErr.takeError()).c_str();
+  int Rc =
+      DeviceOrErr->getMappingInfo().disassociatePtr(const_cast<void *>(Hst));
+  ODBG(ADT_Interface) << "Result " << Rc;
+}
+
+} // namespace llvm::acc::target
diff --git a/offload/libacctarget/RuntimeInterface.cpp b/offload/libacctarget/RuntimeInterface.cpp
new file mode 100644
index 0000000000000..bb379f0adcf24
--- /dev/null
+++ b/offload/libacctarget/RuntimeInterface.cpp
@@ -0,0 +1,248 @@
+//===- AccEntryCommonImpl.cpp -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeviceManager.h"
+#include "Interface.h"
+#include "Private.h"
+#include "QueueManager.h"
+#include "Shared/Debug.h"
+#include "include/openacc.h"
+
+using namespace llvm::acc::target;
+
+extern "C" {
+int acc_get_num_devices(acc_device_t DevType) {
+  return DM->getNumDevices(DevType);
+}
+int acc_get_num_devices_(acc_device_t *DevType) {
+  return acc_get_num_devices(*DevType);
+}
+
+int acc_get_device_num(acc_device_t DevType) {
+  return DM->getDeviceId(DevType);
+}
+int acc_get_device_num_(acc_device_t *DevType) {
+  return acc_get_device_num(*DevType);
+}
+
+void acc_set_device_num(int DevNum, acc_device_t DevType) {
+  __tgt_acc_set_device_num(nullptr, 0, DevType, DevNum);
+}
+void acc_set_device_num_(int *DevNum, acc_device_t *DevType) {
+  acc_set_device_num(*DevNum, *DevType);
+}
+
+void acc_set_device_type(acc_device_t DevType) {
+  __tgt_acc_set_device_type(nullptr, 0, DevType);
+}
+void acc_set_device_type_(acc_device_t *DevType) {
+  acc_set_device_type(*DevType);
+}
+
+void acc_set_device(acc_device_t DevType) {
+  __tgt_acc_set_device_type(nullptr, 0, DevType);
+}
+void acc_set_device_(acc_device_t *DevType) { acc_set_device(*DevType); }
+
+acc_device_t acc_get_device_type(void) { return DM->getDeviceType(); }
+acc_device_t acc_get_device_type_(void) { return acc_get_device_type(); }
+
+acc_device_t acc_get_device(void) { return DM->getDeviceType(); }
+acc_device_t acc_get_device_(void) { return acc_get_device(); }
+
+size_t acc_get_property(int DevNum, acc_device_t DevType,
+                        acc_device_property_t Prop) {
+  return DM->getDeviceProperty(DevNum, DevType, Prop);
+}
+size_t acc_get_property_(int *DevNum, acc_device_t *DevType,
+                         acc_device_property_t *Prop) {
+  return acc_get_property(*DevNum, *DevType, *Prop);
+}
+
+const char *acc_get_property_string(int DevNum, acc_device_t DevType,
+                                    acc_device_property_t Prop) {
+  return DM->getDevicePropertyString(DevNum, DevType, Prop);
+}
+const char *acc_get_property_string_(int *DevNum, acc_device_t *DevType,
+                                     acc_device_property_t *Prop) {
+  return acc_get_property_string(*DevNum, *DevType, *Prop);
+}
+
+void acc_async_wait(int WaitArg) {
+  accAsyncWait(nullptr, DM->getPMDeviceId(), WaitArg);
+}
+void acc_async_wait_(int *WaitArg) { acc_async_wait(*WaitArg); }
+
+void acc_wait_async(int WaitArg) {
+  accAsyncWait(nullptr, DM->getPMDeviceId(), WaitArg);
+}
+void acc_wait_async_(int *WaitArg) { acc_wait_async(*WaitArg); }
+
+void acc_wait(int WaitArg) {
+  accAsyncWait(nullptr, DM->getPMDeviceId(), WaitArg);
+}
+void acc_wait_(int *WaitArg) { acc_wait(*WaitArg); }
+
+void acc_wait_device(int WaitArg, int DevNum) {
+  accAsyncWait(nullptr, DevNum, WaitArg);
+}
+void acc_wait_device_(int *WaitArg, int *DevNum) {
+  acc_wait_device(*WaitArg, *DevNum);
+}
+
+void acc_wait_all_async() { accAsyncWaitAll(nullptr); }
+void acc_wait_all_async_() { acc_wait_all_async(); }
+
+void acc_async_wait_all() { accAsyncWaitAll(nullptr); }
+void acc_async_wait_all_() { acc_async_wait_all(); }
+
+void acc_wait_all() { accAsyncWaitAll(nullptr); }
+void acc_wait_all_() { acc_wait_all(); }
+
+void acc_wait_all_device(int DevNum) { accAsyncWaitAll(nullptr, DevNum); }
+void acc_wait_all_device_(int *DevNum) { acc_wait_all_device(*DevNum); }
+
+int acc_wait_any(int Count, int *WaitNum) {
+  REPORT_FATAL() << "acc_wait_any not yet implemented.";
+  return 0;
+}
+int acc_wait_any_(int *Count, int **WaitNum) {
+  return acc_wait_any(*Count, *WaitNum);
+}
+
+int acc_wait_any_device(int Count, int *WaitNum, int DevNum) {
+  REPORT_FATAL() << "acc_wait_any_device not yet implemented.";
+  return 0;
+}
+int acc_wait_any_device_(int *Count, int **WaitNum, int *DevNum) {
+  return acc_wait_any_device(*Count, *WaitNum, *DevNum);
+}
+
+void acc_set_default_async(int Async) {
+  __tgt_acc_set_default_async(nullptr, Async);
+}
+void acc_set_default_async_(int *Async) { acc_set_default_async(*Async); }
+
+int acc_get_default_async(void) { return icv::AccDefaultAsyncVar; }
+int acc_get_default_async_(void) { return acc_get_default_async(); }
+
+int acc_async_test(int TestArg) {
+  return !accAsyncTest(nullptr, DM->getPMDeviceId(), TestArg);
+};
+int acc_async_test_(int *WaitArg) { return acc_async_test(*WaitArg); }
+
+int acc_async_test_device(int DevNum, int WaitArg) {
+  return !accAsyncTest(nullptr, DevNum, WaitArg);
+}
+int acc_async_test_device_(int *DevNum, int *WaitArg) {
+  return acc_async_test_device(*DevNum, *WaitArg);
+}
+
+int acc_async_test_all(void) { return !accAsyncTestAll(nullptr); }
+int acc_async_test_all_(void) { return acc_async_test_all(); }
+
+int acc_async_test_all_device(int DevNum) {
+  return !accAsyncTestAll(nullptr, DevNum);
+}
+int acc_async_test_all_device_(int *DevNum) {
+  return acc_async_test_all_device(*DevNum);
+}
+
+void acc_init(acc_device_t DevType) { __tgt_acc_init(nullptr, 0, DevType, -1); }
+void acc_init_(acc_device_t *DevType) { acc_init(*DevType); }
+
+void acc_init_device(int DevNum, acc_device_t DevType) {
+  __tgt_acc_init(nullptr, 0, DevType, DevNum);
+}
+void acc_init_device_(int *DevNum, acc_device_t *DevType) {
+  acc_init_device(*DevNum, *DevType);
+}
+
+void acc_shutdown(acc_device_t DevType) {
+  __tgt_acc_shutdown(nullptr, 0, DevType, -1);
+}
+void acc_shutdown_(acc_device_t *DevType) { acc_shutdown(*DevType); }
+
+void acc_shutdown_device(int DevNum, acc_device_t DevType) {
+  __tgt_acc_shutdown(nullptr, 0, DevType, DevNum);
+}
+void acc_shutdown_device_(int *DevNum, acc_device_t *DevType) {
+  acc_shutdown_device(*DevNum, *DevType);
+}
+
+void acc_free(void *DataDev) { accFree(DataDev); }
+void acc_free_(void **DataDev) { acc_free(*DataDev); }
+
+void *acc_malloc(size_t Bytes) { return accAlloc(Bytes); }
+void *acc_malloc_(size_t *Bytes) { return acc_malloc(*Bytes); }
+
+void acc_map_data(void *DataArg, void *DataDev, size_t Bytes) {
+  accMapData(DataArg, DataDev, Bytes);
+}
+void acc_map_data_(void **DataArg, void **DataDev, size_t *Bytes) {
+  acc_map_data(*DataArg, *DataDev, *Bytes);
+}
+
+void acc_unmap_data(void *DataArg) { accUnmapData(DataArg); }
+void acc_unmap_data_(void **DataArg) { acc_unmap_data(*DataArg); }
+
+void *acc_deviceptr(void *DataArg) {
+  return __tgt_acc_get_deviceptr(nullptr, DataArg, 0, DataArg);
+}
+void *acc_deviceptr_(void **DataArg) { return acc_deviceptr(*DataArg); }
+
+void *acc_hostptr(void *DataDev) {
+  REPORT_FATAL() << "acc_hostptr not yet implemented";
+  return nullptr;
+}
+void *acc_hostptr_(void **DataDev) { return acc_hostptr(*DataDev); }
+
+void acc_memcpy_from_device(void *DataHostDest, void *DataDevSrc,
+                            size_t Bytes) {
+  accMemcpyFromDevice(DataHostDest, DataDevSrc, Bytes);
+}
+void acc_memcpy_from_device_(void **DataHostDest, void **DataDevSrc,
+                             size_t *Bytes) {
+  acc_memcpy_from_device(*DataHostDest, *DataDevSrc, *Bytes);
+}
+
+void acc_memcpy_to_device(void *DataDevDest, void *DataHostSrc, size_t Bytes) {
+  accMemcpyToDevice(DataDevDest, DataHostSrc, Bytes);
+}
+void acc_memcpy_to_device_(void **DataDevDest, void **DataHostSrc,
+                           size_t *Bytes) {
+  acc_memcpy_to_device(*DataDevDest, *DataHostSrc, *Bytes);
+}
+
+void acc_memcpy_d2d(void *DataDevDest, void *DataHostSrc, size_t Bytes,
+                    int DevNumDest, int DevNumSrc) {
+  accMemcpyD2D(DataDevDest, DataHostSrc, Bytes, DevNumDest, DevNumSrc);
+}
+void acc_memcpy_d2d_(void **DataDevDest, void **DataHostSrc, size_t *Bytes,
+                     int *DevNumDest, int *DevNumSrc) {
+  acc_memcpy_d2d(*DataDevDest, *DataHostSrc, *Bytes, *DevNumDest, *DevNumSrc);
+}
+
+int acc_on_device(acc_device_t DevType) { return DevType == acc_device_host; }
+int acc_on_device_(acc_device_t *DevType) { return acc_on_device(*DevType); }
+
+void acc_present_dump_all() {
+  REPORT_WARN() << "acc_present_dump_all not yet implemented";
+}
+void acc_present_dump_all_() { acc_present_dump_all(); }
+
+void acc_attach_dump_all() {
+  REPORT_WARN() << "acc_attach_dump_all not yet implemented";
+}
+void acc_attach_dump_all_() { acc_attach_dump_all(); }
+
+void acc_attach_dump() {
+  REPORT_WARN() << "acc_attach_dump not yet implemented";
+}
+void acc_attach_dump_() { acc_attach_dump(); }
+}
diff --git a/offload/libacctarget/exports b/offload/libacctarget/exports
new file mode 100644
index 0000000000000..4ce05c2542164
--- /dev/null
+++ b/offload/libacctarget/exports
@@ -0,0 +1,182 @@
+VERS1.0 {
+  global:
+    __tgt_acc_register_lib;
+    __tgt_acc_unregister_lib;
+
+    acc_is_present;
+    _cfi_acc_is_present_a;
+
+    acc_create;
+    acc_pcreate;
+    acc_present_or_create;
+    acc_delete;
+    acc_delete_finalize;
+    acc_copyin;
+    acc_pcopyin;
+    acc_present_or_copyin;
+    acc_copyout;
+    acc_copyout_finalize;
+    acc_update_device;
+    acc_updatein;
+    acc_update_self;
+    acc_update_host;
+    acc_updateout;
+
+    acc_create_async;
+    acc_pcreate_async;
+    acc_present_or_create_async;
+    acc_delete_async;
+    acc_delete_finalize_async;
+    acc_copyin_async;
+    acc_pcopyin_async;
+    acc_present_or_copyin_async;
+    acc_copyout_async;
+    acc_copyout_finalize_async;
+    acc_update_device_async;
+    acc_updatein_async;
+    acc_update_self_async;
+    acc_update_host_async;
+    acc_updateout_async;
+
+    _cfi_acc_create_a;
+    _cfi_acc_pcreate_a;
+    _cfi_acc_present_or_create_a;
+    _cfi_acc_delete_a;
+    _cfi_acc_delete_finalize_a;
+    _cfi_acc_copyin_a;
+    _cfi_acc_pcopyin_a;
+    _cfi_acc_present_or_copyin_a;
+    _cfi_acc_copyout_a;
+    _cfi_acc_copyout_finalize_a;
+    _cfi_acc_update_device_a;
+    _cfi_acc_updatein_a;
+    _cfi_acc_update_self_a;
+    _cfi_acc_update_host_a;
+    _cfi_acc_updateout_a;
+
+    _cfi_acc_create_async_a;
+    _cfi_acc_pcreate_async_a;
+    _cfi_acc_present_or_create_async_a;
+    _cfi_acc_delete_async_a;
+    _cfi_acc_delete_finalize_async_a;
+    _cfi_acc_copyin_async_a;
+    _cfi_acc_pcopyin_async_a;
+    _cfi_acc_present_or_copyin_async_a;
+    _cfi_acc_copyout_async_a;
+    _cfi_acc_copyout_finalize_async_a;
+    _cfi_acc_update_device_async_a;
+    _cfi_acc_updatein_async_a;
+    _cfi_acc_update_self_async_a;
+    _cfi_acc_update_host_async_a;
+    _cfi_acc_updateout_async_a;
+
+    __tgt_acc_declare;
+    __tgt_acc_data_update;
+    __tgt_acc_data_enter;
+    __tgt_acc_data_exit;
+    __tgt_acc_data_begin;
+    __tgt_acc_data_end;
+    __tgt_acc_kernel;
+    __tgt_acc_get_deviceptr;
+    __tgt_acc_set_default_async;
+    __tgt_acc_set_device_num;
+    __tgt_acc_set_device_type;
+    __tgt_acc_wait;
+    __tgt_acc_init;
+    __tgt_acc_shutdown;
+
+    acc_get_num_devices;
+    acc_get_num_devices_;
+    acc_get_device_num;
+    acc_get_device_num_;
+    acc_set_device_num;
+    acc_set_device_num_;
+    acc_set_device_type;
+    acc_set_device_type_;
+    acc_set_device;
+    acc_set_device_;
+    acc_get_device_type;
+    acc_get_device_type_;
+    acc_get_device;
+    acc_get_device_;
+    acc_get_property;
+    acc_get_property_;
+    acc_get_property_string;
+    acc_get_property_string_;
+
+    acc_async_wait;
+    acc_async_wait_;
+    acc_wait_async;
+    acc_wait_async_;
+    acc_wait;
+    acc_wait_;
+    acc_wait_device;
+    acc_wait_device_;
+    acc_wait_all_async;
+    acc_wait_all_async_;
+    acc_async_wait_all;
+    acc_async_wait_all_;
+    acc_wait_all;
+    acc_wait_all_;
+    acc_wait_all_device;
+    acc_wait_all_device_;
+    acc_wait_any;
+    acc_wait_any_;
+    acc_wait_any_device;
+    acc_wait_any_device_;
+
+    acc_async_test;
+    acc_async_test_;
+    acc_async_test_device;
+    acc_async_test_device_;
+    acc_async_test_all;
+    acc_async_test_all_;
+    acc_async_test_all_device;
+    acc_async_test_all_device_;
+
+    acc_init;
+    acc_init_;
+    acc_init_device;
+    acc_init_device_;
+    acc_shutdown;
+    acc_shutdown_;
+    acc_shutdown_device;
+    acc_shutdown_device_;
+
+    acc_set_default_async;
+    acc_set_default_async_;
+    acc_get_default_async;
+    acc_get_default_async_;
+
+    acc_malloc;
+    acc_malloc_;
+    acc_free;
+    acc_free_;
+    acc_map_data;
+    acc_map_data_;
+    acc_unmap_data;
+    acc_unmap_data_;
+    acc_deviceptr;
+    acc_deviceptr_;
+    acc_hostptr;
+    acc_hostptr_;
+
+    acc_memcpy_from_device;
+    acc_memcpy_from_device_;
+    acc_memcpy_to_device;
+    acc_memcpy_to_device_;
+    acc_memcpy_d2d;
+    acc_memcpy_d2d_;
+
+    acc_on_device;
+    acc_on_device_;
+    acc_present_dump_all;
+    acc_present_dump_all_;
+    acc_attach_dump_all;
+    acc_attach_dump_all_;
+    acc_attach_dump;
+    acc_attach_dump_;
+
+  local:
+    *;
+};
diff --git a/offload/libacctarget/include/openacc.h b/offload/libacctarget/include/openacc.h
new file mode 100644
index 0000000000000..8f77d220a8d8e
--- /dev/null
+++ b/offload/libacctarget/include/openacc.h
@@ -0,0 +1,46 @@
+//===- openacc.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ACC_OFFLOAD_INCLUDE_OPENACC_H_
+#define LLVM_ACC_OFFLOAD_INCLUDE_OPENACC_H_
+
+#define acc_async_sync -1
+#define acc_async_default -3
+#define acc_async_noval -4
+
+typedef enum {
+  acc_device_none = 0,
+  acc_device_default = 1,
+  acc_device_host = 2,
+  acc_device_not_host = 3,
+  acc_device_current = 10,
+
+  acc_device_concrete_type_begin = 4,
+  acc_device_nvidia = 4,
+  acc_device_amd = 5,
+  acc_device_spirv = 6,
+  acc_device_concrete_type_end = 7,
+
+} acc_device_t;
+
+typedef enum {
+  acc_property_int_begin = 0,
+  acc_property_memory = 0,
+  acc_property_free_memory = 1,
+  acc_property_shared_memory_support = 2,
+  acc_property_int_end = 3,
+
+  acc_property_string_begin = 1000,
+  acc_property_name = 1000,
+  acc_property_vendor = 1001,
+  acc_property_driver = 1002,
+  acc_property_string_end = 1003,
+
+} acc_device_property_t;
+
+#endif // LLVM_ACC_OFFLOAD_INCLUDE_OPENACC_H_



More information about the llvm-branch-commits mailing list