[Openmp-commits] [openmp] 0c554a4 - [libomptarget] Move device environment to shared header, remove divergence
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Thu Oct 7 04:04:08 PDT 2021
Author: Jon Chesterfield
Date: 2021-10-07T12:03:48+01:00
New Revision: 0c554a4769f2e21233b687c7427c1a47f7bd375e
URL: https://github.com/llvm/llvm-project/commit/0c554a4769f2e21233b687c7427c1a47f7bd375e
DIFF: https://github.com/llvm/llvm-project/commit/0c554a4769f2e21233b687c7427c1a47f7bd375e.diff
LOG: [libomptarget] Move device environment to shared header, remove divergence
Follow on to D110006, related to D110957
Where implementations have diverged this resolves to match the new DeviceRTL
- replaces definitions of this struct in deviceRTL and plugins with include
- changes the dynamic_shared_size field from D110006 to 32 bits
- handles stdint being unavailable in DeviceRTL
- adds a zero initializer for the field to amdgpu
- moves the extern declaration for deviceRTL to target_interface
(omptarget.h is more natural, but doesn't work due to include order
with debug.h)
- Renames the fields everywhere to match the LLVM format used in DeviceRTL
- Makes debug_level uint32_t everywhere (previously sometimes int32_t)
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D111069
Added:
openmp/libomptarget/include/DeviceEnvironment.h
Modified:
openmp/libomptarget/DeviceRTL/CMakeLists.txt
openmp/libomptarget/DeviceRTL/src/Configuration.cpp
openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
openmp/libomptarget/deviceRTLs/common/debug.h
openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
openmp/libomptarget/deviceRTLs/target_interface.h
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
openmp/libomptarget/plugins/cuda/src/rtl.cpp
Removed:
openmp/libomptarget/deviceRTLs/common/device_environment.h
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index bf2b6b1f8ac4d..da6f34a5ae4ff 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -174,6 +174,7 @@ set(bc_flags -S -x c++ -std=c++17
-fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
-Xclang -target-feature -Xclang +ptx61
-I${include_directory}
+ -I${devicertl_base_directory}/../include
${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
)
diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index b725888be2da7..2b6f20fb1732c 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -12,18 +12,12 @@
//===----------------------------------------------------------------------===//
#include "Configuration.h"
+#include "DeviceEnvironment.h"
#include "State.h"
#include "Types.h"
using namespace _OMP;
-struct DeviceEnvironmentTy {
- uint32_t DebugKind;
- uint32_t NumDevices;
- uint32_t DeviceNum;
- uint64_t DynamicMemSize;
-};
-
#pragma omp declare target
extern uint32_t __omp_rtl_debug_kind;
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 6e92634f5420b..3f4c02671aebc 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -92,7 +92,6 @@ set(h_files
${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
${devicertl_base_directory}/common/debug.h
- ${devicertl_base_directory}/common/device_environment.h
${devicertl_base_directory}/common/omptarget.h
${devicertl_base_directory}/common/omptargeti.h
${devicertl_base_directory}/common/state-queue.h
@@ -137,6 +136,7 @@ macro(add_cuda_bc_library)
-I${CMAKE_CURRENT_SOURCE_DIR}/src
-I${devicertl_base_directory}/common/include
-I${devicertl_base_directory}
+ -I${devicertl_base_directory}/../include
${LIBOMPTARGET_LLVM_INCLUDE_DIRS_AMDGCN})
set(bc1_files)
diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h
index 99c9b6cd58183..4ca1e5563fb36 100644
--- a/openmp/libomptarget/deviceRTLs/common/debug.h
+++ b/openmp/libomptarget/deviceRTLs/common/debug.h
@@ -28,7 +28,6 @@
#ifndef _OMPTARGET_NVPTX_DEBUG_H_
#define _OMPTARGET_NVPTX_DEBUG_H_
-#include "common/device_environment.h"
#include "target_interface.h"
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
index d420fad42b62d..aab16a31bb085 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
@@ -12,7 +12,6 @@
#pragma omp declare target
#include "common/allocator.h"
-#include "common/device_environment.h"
#include "common/omptarget.h"
////////////////////////////////////////////////////////////////////////////////
@@ -20,7 +19,7 @@
////////////////////////////////////////////////////////////////////////////////
PLUGIN_ACCESSIBLE
-omptarget_device_environmentTy omptarget_device_environment;
+DeviceEnvironmentTy omptarget_device_environment;
////////////////////////////////////////////////////////////////////////////////
// global data holding OpenMP state information
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 4ccadec86eeda..42cfbaf23beb7 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -173,6 +173,7 @@ set(bc_flags -S -x c++ -O1 -std=c++14
-I${devicertl_base_directory}
-I${devicertl_common_directory}/include
-I${devicertl_nvptx_directory}/src
+ -I${devicertl_base_directory}/../include
${LIBOMPTARGET_LLVM_INCLUDE_DIRS_NVPTX})
if(${LIBOMPTARGET_NVPTX_DEBUG})
diff --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h
index c7ac0652e7557..8a1a2e4d6e2c3 100644
--- a/openmp/libomptarget/deviceRTLs/target_interface.h
+++ b/openmp/libomptarget/deviceRTLs/target_interface.h
@@ -13,6 +13,9 @@
#ifndef _OMPTARGET_TARGET_INTERFACE_H_
#define _OMPTARGET_TARGET_INTERFACE_H_
+#include <stdint.h>
+
+#include "DeviceEnvironment.h"
#include "target_impl.h"
// Calls to the NVPTX layer (assuming 1D layout)
@@ -70,4 +73,6 @@ EXTERN void __kmpc_impl_free(void *);
// Barrier until num_threads arrive.
EXTERN void __kmpc_impl_named_sync(uint32_t num_threads);
+extern DeviceEnvironmentTy omptarget_device_environment;
+
#endif // _OMPTARGET_TARGET_INTERFACE_H_
diff --git a/openmp/libomptarget/deviceRTLs/common/device_environment.h b/openmp/libomptarget/include/DeviceEnvironment.h
similarity index 71%
rename from openmp/libomptarget/deviceRTLs/common/device_environment.h
rename to openmp/libomptarget/include/DeviceEnvironment.h
index 5f94567a78853..231492c68f762 100644
--- a/openmp/libomptarget/deviceRTLs/common/device_environment.h
+++ b/openmp/libomptarget/include/DeviceEnvironment.h
@@ -1,4 +1,4 @@
-//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===//
+//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -13,14 +13,13 @@
#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_
#define _OMPTARGET_DEVICE_ENVIRONMENT_H_
-#include "target_impl.h"
+// deviceRTL uses <stdint> and DeviceRTL uses explicit definitions
-struct omptarget_device_environmentTy {
- int32_t debug_level;
- uint32_t num_devices;
- uint32_t device_num;
+struct DeviceEnvironmentTy {
+ uint32_t DebugKind;
+ uint32_t NumDevices;
+ uint32_t DeviceNum;
+ uint32_t DynamicMemSize;
};
-extern omptarget_device_environmentTy omptarget_device_environment;
-
#endif
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index c4e49c7a8020d..ef1efef833dd4 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -30,6 +30,7 @@
#include "internal.h"
#include "rt.h"
+#include "DeviceEnvironment.h"
#include "get_elf_mach_gfx_name.h"
#include "omptargetplugin.h"
#include "print_tracing.h"
@@ -802,14 +803,6 @@ class RTLDeviceInfoTy {
pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
-// TODO: May need to drop the trailing to fields until deviceRTL is updated
-struct omptarget_device_environmentTy {
- int32_t debug_level; // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG
- // only useful for Debug build of deviceRTLs
- int32_t num_devices; // gets number of active offload devices
- int32_t device_num; // gets a value 0 to num_devices-1
-};
-
static RTLDeviceInfoTy DeviceInfo;
namespace {
@@ -1300,15 +1293,12 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
}
struct device_environment {
- // initialise an omptarget_device_environmentTy in the deviceRTL
+ // initialise an DeviceEnvironmentTy in the deviceRTL
// patches around
diff erences in the deviceRTL between trunk, aomp,
// rocmcc. Over time these
diff erences will tend to zero and this class
// simplified.
- // Symbol may be in .data or .bss, and may be missing fields:
- // - aomp has debug_level, num_devices, device_num
- // - trunk has debug_level
- // - under review in trunk is debug_level, device_num
- // - rocmcc matches aomp, patch to swap num_devices and device_num
+ // Symbol may be in .data or .bss, and may be missing fields, todo:
+ // review aomp/trunk/rocm and simplify the following
// The symbol may also have been deadstripped because the device side
// accessors were unused.
@@ -1318,7 +1308,7 @@ struct device_environment {
// gpu (trunk) and initialize after loading.
const char *sym() { return "omptarget_device_environment"; }
- omptarget_device_environmentTy host_device_env;
+ DeviceEnvironmentTy host_device_env;
symbol_info si;
bool valid = false;
@@ -1329,12 +1319,13 @@ struct device_environment {
__tgt_device_image *image, const size_t img_size)
: image(image), img_size(img_size) {
- host_device_env.num_devices = number_devices;
- host_device_env.device_num = device_id;
- host_device_env.debug_level = 0;
+ host_device_env.NumDevices = number_devices;
+ host_device_env.DeviceNum = device_id;
+ host_device_env.DebugKind = 0;
+ host_device_env.DynamicMemSize = 0;
#ifdef OMPTARGET_DEBUG
if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) {
- host_device_env.debug_level = std::stoi(envStr);
+ host_device_env.DebugKind = std::stoi(envStr);
}
#endif
@@ -1374,7 +1365,7 @@ struct device_environment {
if (!in_image()) {
DP("Setting global device environment after load (%u bytes)\n",
si.size);
- int device_id = host_device_env.device_num;
+ int device_id = host_device_env.DeviceNum;
auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id];
void *state_ptr;
uint32_t state_ptr_size;
@@ -1430,9 +1421,9 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
// This function loads the device image onto gpu[device_id] and does other
// per-image initialization work. Specifically:
//
- // - Initialize an omptarget_device_environmentTy instance embedded in the
+ // - Initialize an DeviceEnvironmentTy instance embedded in the
// image at the symbol "omptarget_device_environment"
- // Fields debug_level, device_num, num_devices. Used by the deviceRTL.
+ // Fields DebugKind, DeviceNum, NumDevices. Used by the deviceRTL.
//
// - Allocate a large array per-gpu (could be moved to init_device)
// - Read a uint64_t at symbol omptarget_nvptx_device_State_size
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 6b370f1d01026..931ab12918863 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -21,6 +21,7 @@
#include <vector>
#include "Debug.h"
+#include "DeviceEnvironment.h"
#include "omptargetplugin.h"
#define TARGET_NAME CUDA
@@ -87,16 +88,6 @@ struct KernelTy {
: Func(_Func), ExecutionMode(_ExecutionMode) {}
};
-/// Device environment data
-/// Manually sync with the deviceRTL side for now, move to a dedicated header
-/// file later.
-struct omptarget_device_environmentTy {
- int32_t debug_level;
- uint32_t num_devices;
- uint32_t device_num;
- uint64_t dynamic_shared_size;
-};
-
namespace {
bool checkResult(CUresult Err, const char *ErrMsg) {
if (Err == CUDA_SUCCESS)
@@ -897,13 +888,13 @@ class DeviceRTLTy {
// send device environment data to the device
{
// TODO: The device ID used here is not the real device ID used by OpenMP.
- omptarget_device_environmentTy DeviceEnv{
- 0, static_cast<uint32_t>(NumberOfDevices),
- static_cast<uint32_t>(DeviceId), DynamicMemorySize};
+ DeviceEnvironmentTy DeviceEnv{0, static_cast<uint32_t>(NumberOfDevices),
+ static_cast<uint32_t>(DeviceId),
+ static_cast<uint32_t>(DynamicMemorySize)};
#ifdef OMPTARGET_DEBUG
if (const char *EnvStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG"))
- DeviceEnv.debug_level = std::stoi(EnvStr);
+ DeviceEnv.DebugKind = std::stoi(EnvStr);
#endif
const char *DeviceEnvName = "omptarget_device_environment";
More information about the Openmp-commits
mailing list