[Openmp-commits] [openmp] 0c554a4 - [libomptarget] Move device environment to shared header, remove divergence

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Thu Oct 7 04:04:08 PDT 2021


Author: Jon Chesterfield
Date: 2021-10-07T12:03:48+01:00
New Revision: 0c554a4769f2e21233b687c7427c1a47f7bd375e

URL: https://github.com/llvm/llvm-project/commit/0c554a4769f2e21233b687c7427c1a47f7bd375e
DIFF: https://github.com/llvm/llvm-project/commit/0c554a4769f2e21233b687c7427c1a47f7bd375e.diff

LOG: [libomptarget] Move device environment to shared header, remove divergence

Follow on to D110006, related to D110957

Where implementations have diverged this resolves to match the new DeviceRTL

- replaces definitions of this struct in deviceRTL and plugins with include
- changes the dynamic_shared_size field from D110006 to 32 bits
- handles stdint being unavailable in DeviceRTL
- adds a zero initializer for the field to amdgpu
- moves the extern declaration for deviceRTL to target_interface
  (omptarget.h is more natural, but doesn't work due to include order
  with debug.h)
- Renames the fields everywhere to match the LLVM format used in DeviceRTL
- Makes debug_level uint32_t everywhere (previously sometimes int32_t)

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D111069

Added: 
    openmp/libomptarget/include/DeviceEnvironment.h

Modified: 
    openmp/libomptarget/DeviceRTL/CMakeLists.txt
    openmp/libomptarget/DeviceRTL/src/Configuration.cpp
    openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/common/debug.h
    openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
    openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/target_interface.h
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
    openmp/libomptarget/plugins/cuda/src/rtl.cpp

Removed: 
    openmp/libomptarget/deviceRTLs/common/device_environment.h


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index bf2b6b1f8ac4d..da6f34a5ae4ff 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -174,6 +174,7 @@ set(bc_flags -S -x c++ -std=c++17
              -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
              -Xclang -target-feature -Xclang +ptx61
              -I${include_directory}
+             -I${devicertl_base_directory}/../include
              ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
 )
 

diff  --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index b725888be2da7..2b6f20fb1732c 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -12,18 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "Configuration.h"
+#include "DeviceEnvironment.h"
 #include "State.h"
 #include "Types.h"
 
 using namespace _OMP;
 
-struct DeviceEnvironmentTy {
-  uint32_t DebugKind;
-  uint32_t NumDevices;
-  uint32_t DeviceNum;
-  uint64_t DynamicMemSize;
-};
-
 #pragma omp declare target
 
 extern uint32_t __omp_rtl_debug_kind;

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 6e92634f5420b..3f4c02671aebc 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -92,7 +92,6 @@ set(h_files
   ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
   ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
   ${devicertl_base_directory}/common/debug.h
-  ${devicertl_base_directory}/common/device_environment.h
   ${devicertl_base_directory}/common/omptarget.h
   ${devicertl_base_directory}/common/omptargeti.h
   ${devicertl_base_directory}/common/state-queue.h
@@ -137,6 +136,7 @@ macro(add_cuda_bc_library)
     -I${CMAKE_CURRENT_SOURCE_DIR}/src
     -I${devicertl_base_directory}/common/include
     -I${devicertl_base_directory}
+    -I${devicertl_base_directory}/../include
     ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_AMDGCN})
 
   set(bc1_files)

diff  --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h
index 99c9b6cd58183..4ca1e5563fb36 100644
--- a/openmp/libomptarget/deviceRTLs/common/debug.h
+++ b/openmp/libomptarget/deviceRTLs/common/debug.h
@@ -28,7 +28,6 @@
 #ifndef _OMPTARGET_NVPTX_DEBUG_H_
 #define _OMPTARGET_NVPTX_DEBUG_H_
 
-#include "common/device_environment.h"
 #include "target_interface.h"
 
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
index d420fad42b62d..aab16a31bb085 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
@@ -12,7 +12,6 @@
 #pragma omp declare target
 
 #include "common/allocator.h"
-#include "common/device_environment.h"
 #include "common/omptarget.h"
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -20,7 +19,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 PLUGIN_ACCESSIBLE
-omptarget_device_environmentTy omptarget_device_environment;
+DeviceEnvironmentTy omptarget_device_environment;
 
 ////////////////////////////////////////////////////////////////////////////////
 // global data holding OpenMP state information

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 4ccadec86eeda..42cfbaf23beb7 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -173,6 +173,7 @@ set(bc_flags -S -x c++ -O1 -std=c++14
              -I${devicertl_base_directory}
              -I${devicertl_common_directory}/include
              -I${devicertl_nvptx_directory}/src
+             -I${devicertl_base_directory}/../include
              ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_NVPTX})
 
 if(${LIBOMPTARGET_NVPTX_DEBUG})

diff  --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h
index c7ac0652e7557..8a1a2e4d6e2c3 100644
--- a/openmp/libomptarget/deviceRTLs/target_interface.h
+++ b/openmp/libomptarget/deviceRTLs/target_interface.h
@@ -13,6 +13,9 @@
 #ifndef _OMPTARGET_TARGET_INTERFACE_H_
 #define _OMPTARGET_TARGET_INTERFACE_H_
 
+#include <stdint.h>
+
+#include "DeviceEnvironment.h"
 #include "target_impl.h"
 
 // Calls to the NVPTX layer (assuming 1D layout)
@@ -70,4 +73,6 @@ EXTERN void __kmpc_impl_free(void *);
 // Barrier until num_threads arrive.
 EXTERN void __kmpc_impl_named_sync(uint32_t num_threads);
 
+extern DeviceEnvironmentTy omptarget_device_environment;
+
 #endif // _OMPTARGET_TARGET_INTERFACE_H_

diff  --git a/openmp/libomptarget/deviceRTLs/common/device_environment.h b/openmp/libomptarget/include/DeviceEnvironment.h
similarity index 71%
rename from openmp/libomptarget/deviceRTLs/common/device_environment.h
rename to openmp/libomptarget/include/DeviceEnvironment.h
index 5f94567a78853..231492c68f762 100644
--- a/openmp/libomptarget/deviceRTLs/common/device_environment.h
+++ b/openmp/libomptarget/include/DeviceEnvironment.h
@@ -1,4 +1,4 @@
-//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===//
+//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -13,14 +13,13 @@
 #ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_
 #define _OMPTARGET_DEVICE_ENVIRONMENT_H_
 
-#include "target_impl.h"
+// deviceRTL uses <stdint> and DeviceRTL uses explicit definitions
 
-struct omptarget_device_environmentTy {
-  int32_t debug_level;
-  uint32_t num_devices;
-  uint32_t device_num;
+struct DeviceEnvironmentTy {
+  uint32_t DebugKind;
+  uint32_t NumDevices;
+  uint32_t DeviceNum;
+  uint32_t DynamicMemSize;
 };
 
-extern omptarget_device_environmentTy omptarget_device_environment;
-
 #endif

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index c4e49c7a8020d..ef1efef833dd4 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -30,6 +30,7 @@
 #include "internal.h"
 #include "rt.h"
 
+#include "DeviceEnvironment.h"
 #include "get_elf_mach_gfx_name.h"
 #include "omptargetplugin.h"
 #include "print_tracing.h"
@@ -802,14 +803,6 @@ class RTLDeviceInfoTy {
 
 pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
 
-// TODO: May need to drop the trailing to fields until deviceRTL is updated
-struct omptarget_device_environmentTy {
-  int32_t debug_level; // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG
-                       // only useful for Debug build of deviceRTLs
-  int32_t num_devices; // gets number of active offload devices
-  int32_t device_num;  // gets a value 0 to num_devices-1
-};
-
 static RTLDeviceInfoTy DeviceInfo;
 
 namespace {
@@ -1300,15 +1293,12 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
 }
 
 struct device_environment {
-  // initialise an omptarget_device_environmentTy in the deviceRTL
+  // initialise an DeviceEnvironmentTy in the deviceRTL
   // patches around 
diff erences in the deviceRTL between trunk, aomp,
   // rocmcc. Over time these 
diff erences will tend to zero and this class
   // simplified.
-  // Symbol may be in .data or .bss, and may be missing fields:
-  //  - aomp has debug_level, num_devices, device_num
-  //  - trunk has debug_level
-  //  - under review in trunk is debug_level, device_num
-  //  - rocmcc matches aomp, patch to swap num_devices and device_num
+  // Symbol may be in .data or .bss, and may be missing fields, todo:
+  // review aomp/trunk/rocm and simplify the following
 
   // The symbol may also have been deadstripped because the device side
   // accessors were unused.
@@ -1318,7 +1308,7 @@ struct device_environment {
   // gpu (trunk) and initialize after loading.
   const char *sym() { return "omptarget_device_environment"; }
 
-  omptarget_device_environmentTy host_device_env;
+  DeviceEnvironmentTy host_device_env;
   symbol_info si;
   bool valid = false;
 
@@ -1329,12 +1319,13 @@ struct device_environment {
                      __tgt_device_image *image, const size_t img_size)
       : image(image), img_size(img_size) {
 
-    host_device_env.num_devices = number_devices;
-    host_device_env.device_num = device_id;
-    host_device_env.debug_level = 0;
+    host_device_env.NumDevices = number_devices;
+    host_device_env.DeviceNum = device_id;
+    host_device_env.DebugKind = 0;
+    host_device_env.DynamicMemSize = 0;
 #ifdef OMPTARGET_DEBUG
     if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) {
-      host_device_env.debug_level = std::stoi(envStr);
+      host_device_env.DebugKind = std::stoi(envStr);
     }
 #endif
 
@@ -1374,7 +1365,7 @@ struct device_environment {
       if (!in_image()) {
         DP("Setting global device environment after load (%u bytes)\n",
            si.size);
-        int device_id = host_device_env.device_num;
+        int device_id = host_device_env.DeviceNum;
         auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id];
         void *state_ptr;
         uint32_t state_ptr_size;
@@ -1430,9 +1421,9 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
   // This function loads the device image onto gpu[device_id] and does other
   // per-image initialization work. Specifically:
   //
-  // - Initialize an omptarget_device_environmentTy instance embedded in the
+  // - Initialize an DeviceEnvironmentTy instance embedded in the
   //   image at the symbol "omptarget_device_environment"
-  //   Fields debug_level, device_num, num_devices. Used by the deviceRTL.
+  //   Fields DebugKind, DeviceNum, NumDevices. Used by the deviceRTL.
   //
   // - Allocate a large array per-gpu (could be moved to init_device)
   //   - Read a uint64_t at symbol omptarget_nvptx_device_State_size

diff  --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 6b370f1d01026..931ab12918863 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "Debug.h"
+#include "DeviceEnvironment.h"
 #include "omptargetplugin.h"
 
 #define TARGET_NAME CUDA
@@ -87,16 +88,6 @@ struct KernelTy {
       : Func(_Func), ExecutionMode(_ExecutionMode) {}
 };
 
-/// Device environment data
-/// Manually sync with the deviceRTL side for now, move to a dedicated header
-/// file later.
-struct omptarget_device_environmentTy {
-  int32_t debug_level;
-  uint32_t num_devices;
-  uint32_t device_num;
-  uint64_t dynamic_shared_size;
-};
-
 namespace {
 bool checkResult(CUresult Err, const char *ErrMsg) {
   if (Err == CUDA_SUCCESS)
@@ -897,13 +888,13 @@ class DeviceRTLTy {
     // send device environment data to the device
     {
       // TODO: The device ID used here is not the real device ID used by OpenMP.
-      omptarget_device_environmentTy DeviceEnv{
-          0, static_cast<uint32_t>(NumberOfDevices),
-          static_cast<uint32_t>(DeviceId), DynamicMemorySize};
+      DeviceEnvironmentTy DeviceEnv{0, static_cast<uint32_t>(NumberOfDevices),
+                                    static_cast<uint32_t>(DeviceId),
+                                    static_cast<uint32_t>(DynamicMemorySize)};
 
 #ifdef OMPTARGET_DEBUG
       if (const char *EnvStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG"))
-        DeviceEnv.debug_level = std::stoi(EnvStr);
+        DeviceEnv.DebugKind = std::stoi(EnvStr);
 #endif
 
       const char *DeviceEnvName = "omptarget_device_environment";


        


More information about the Openmp-commits mailing list