[Openmp-commits] [openmp] [OpenMP][DeviceRTL] implemented omp_get_max_teams() and omp_set_num_teams and nteams-var ICV (PR #71259)

Khoi Nguyen via Openmp-commits openmp-commits at lists.llvm.org
Fri Nov 3 17:29:52 PDT 2023


https://github.com/khoing0810 created https://github.com/llvm/llvm-project/pull/71259

`omp_get_max_teams()` returns nteams-var ICV in the device it's being called in.
`omp_set_num_teams(int V)` set nteams-var ICV in the device with an integer value V. If `V` is less than 0, it will set the ICV to 0; if `V` is larger than max number of teams set in the device environment, the ICV will be set to that number.
One can set max number of teams in a device by setting the environment variable `OMP_NUM_TEAMS_DEV_<device>` where `<device>` is the device number/ID.

>From 1c69398a44ec0a3c2212bfd756b3dcf7017b16a7 Mon Sep 17 00:00:00 2001
From: "Khoi D. Nguyen" <nguyen155 at llnl.gov>
Date: Fri, 3 Nov 2023 17:24:11 -0700
Subject: [PATCH] implemented omp_get_max_teams() and omp_set_num_teams and
 nteams-var ICV

---
 .../DeviceRTL/include/Configuration.h         |  3 +
 .../DeviceRTL/include/Interface.h             |  4 ++
 openmp/libomptarget/DeviceRTL/include/State.h | 10 ++++
 .../DeviceRTL/src/Configuration.cpp           |  2 +
 openmp/libomptarget/DeviceRTL/src/State.cpp   |  8 +++
 openmp/libomptarget/include/Environment.h     |  1 +
 .../PluginInterface/PluginInterface.cpp       |  3 +-
 .../test/env/omp_get_max_teams_env_var.c      | 54 ++++++++++++++++++
 .../libomptarget/test/env/omp_set_num_teams.c | 57 +++++++++++++++++++
 9 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 openmp/libomptarget/test/env/omp_get_max_teams_env_var.c
 create mode 100644 openmp/libomptarget/test/env/omp_set_num_teams.c

diff --git a/openmp/libomptarget/DeviceRTL/include/Configuration.h b/openmp/libomptarget/DeviceRTL/include/Configuration.h
index 45e5cead231f724..9529bc1ddf6198a 100644
--- a/openmp/libomptarget/DeviceRTL/include/Configuration.h
+++ b/openmp/libomptarget/DeviceRTL/include/Configuration.h
@@ -55,6 +55,9 @@ bool mayUseThreadStates();
 /// parallelism, or if it was explicitly disabled by the user.
 bool mayUseNestedParallelism();
 
+/// Return max number of teams in the device it's called on.
+uint32_t getMaxTeams(); 
+
 } // namespace config
 } // namespace ompx
 
diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h
index 24de620759c4194..a403561e2bf44b3 100644
--- a/openmp/libomptarget/DeviceRTL/include/Interface.h
+++ b/openmp/libomptarget/DeviceRTL/include/Interface.h
@@ -133,6 +133,10 @@ int omp_get_num_teams(void);
 
 int omp_get_team_num();
 
+int omp_get_max_teams(void);
+
+void omp_set_num_teams(int V);
+
 int omp_get_initial_device(void);
 
 void *llvm_omp_target_dynamic_shared_alloc();
diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index 1d73bdc4f5409cb..ae71fa6159830df 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -54,6 +54,7 @@ inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE;
 
 struct ICVStateTy {
   uint32_t NThreadsVar;
+  uint32_t NTeamsVar;
   uint32_t LevelVar;
   uint32_t ActiveLevelVar;
   uint32_t Padding0Val;
@@ -131,6 +132,7 @@ KernelLaunchEnvironmentTy &getKernelLaunchEnvironment();
 /// TODO
 enum ValueKind {
   VK_NThreads,
+  VK_NTeams,
   VK_Level,
   VK_ActiveLevel,
   VK_MaxActiveLevels,
@@ -190,6 +192,11 @@ lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
       return lookupImpl(&ICVStateTy::NThreadsVar, ForceTeamState);
     return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident,
                                  ForceTeamState);
+  case state::VK_NTeams:
+    if (IsReadonly)
+      return lookupImpl(&ICVStateTy::NTeamsVar, ForceTeamState);
+    return lookupForModify32Impl(&ICVStateTy::NTeamsVar, Ident,
+                                 ForceTeamState);
   case state::VK_Level:
     if (IsReadonly)
       return lookupImpl(&ICVStateTy::LevelVar, ForceTeamState);
@@ -360,6 +367,9 @@ namespace icv {
 /// TODO
 inline state::Value<uint32_t, state::VK_NThreads> NThreads;
 
+/// TODO
+inline state::Value<uint32_t, state::VK_NTeams> NTeams;
+
 /// TODO
 inline state::Value<uint32_t, state::VK_Level> Level;
 
diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index ab1608b1cfb0ae9..1f1a77c99380772 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -75,4 +75,6 @@ bool config::mayUseNestedParallelism() {
   return state::getKernelEnvironment().Configuration.MayUseNestedParallelism;
 }
 
+uint32_t config::getMaxTeams() { return __omp_rtl_device_environment.NumTeams; }
+
 #pragma omp end declare target
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index f8a6d333df0d9ed..43f6a402b47d963 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -199,6 +199,7 @@ void state::ICVStateTy::assertEqual(const ICVStateTy &Other) const {
 
 void state::TeamStateTy::init(bool IsSPMD) {
   ICVState.NThreadsVar = 0;
+  ICVState.NTeamsVar = config::getMaxTeams();
   ICVState.LevelVar = 0;
   ICVState.ActiveLevelVar = 0;
   ICVState.Padding0Val = 0;
@@ -424,6 +425,13 @@ int omp_get_num_teams(void) { return mapping::getNumberOfBlocksInKernel(); }
 
 int omp_get_team_num() { return mapping::getBlockIdInKernel(); }
 
+int omp_get_max_teams(void) { return icv::NTeams; }
+
+void omp_set_num_teams(int V) {
+  icv::NTeams = (V < 0) ? 0 :
+                (V >= config::getMaxTeams()) ? config::getMaxTeams() : V;
+}
+
 int omp_get_initial_device(void) { return -1; }
 }
 
diff --git a/openmp/libomptarget/include/Environment.h b/openmp/libomptarget/include/Environment.h
index bd493e8a0be78f1..5a24696b4aab4b1 100644
--- a/openmp/libomptarget/include/Environment.h
+++ b/openmp/libomptarget/include/Environment.h
@@ -35,6 +35,7 @@ enum class DeviceDebugKind : uint32_t {
 struct DeviceEnvironmentTy {
   uint32_t DeviceDebugKind;
   uint32_t NumDevices;
+  uint32_t NumTeams;
   uint32_t DeviceNum;
   uint32_t DynamicMemSize;
   uint64_t ClockFrequency;
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 106e7a68cd3ae3c..d7d9d0e7e642004 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -628,7 +628,7 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
 GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
                                  const llvm::omp::GV &OMPGridValues)
     : MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
-      OMP_NumTeams("OMP_NUM_TEAMS"),
+      OMP_NumTeams("OMP_NUM_TEAMS_DEV_" + std::to_string(DeviceId)),
       OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
       OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
       OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
@@ -854,6 +854,7 @@ Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin,
   DeviceEnvironmentTy DeviceEnvironment;
   DeviceEnvironment.DeviceDebugKind = OMPX_DebugKind;
   DeviceEnvironment.NumDevices = Plugin.getNumDevices();
+  DeviceEnvironment.NumTeams = (OMP_NumTeams >= 0) ? uint32_t(OMP_NumTeams) : 0;
   // TODO: The device ID used here is not the real device ID used by OpenMP.
   DeviceEnvironment.DeviceNum = DeviceId;
   DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize;
diff --git a/openmp/libomptarget/test/env/omp_get_max_teams_env_var.c b/openmp/libomptarget/test/env/omp_get_max_teams_env_var.c
new file mode 100644
index 000000000000000..4910dbe4a75b298
--- /dev/null
+++ b/openmp/libomptarget/test/env/omp_get_max_teams_env_var.c
@@ -0,0 +1,54 @@
+// Test functionality of omp_get_max_teams() with setting
+// environment variable to 2 GPU devices. If there's only
+// one GPU device, remove the device 1 if statement.
+
+// RUN: %libomptarget-compile-generic -fopenmp-offload-mandatory
+// RUN: env OMP_NUM_TEAMS_DEV_0=5 OMP_NUM_TEAMS_DEV_1=-1 \
+// RUN: %libomptarget-run-generic 
+
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+
+#include <omp.h>
+#include <stdio.h>
+
+const int EXPECTED_NTEAMS_DEV_0 = 5;
+const int EXPECTED_NTEAMS_DEV_1 = 0;
+
+int omp_get_max_teams(void);
+
+int test_nteams_var_env(void) {
+  int errors = 0;
+  int device_id;
+  int n_devs;
+  int curr_nteams = -1;
+#pragma omp target map(tofrom : n_devs)
+  { n_devs = omp_get_num_devices(); }
+
+  for (int i = 0; i < n_devs; i++) {
+#pragma omp target device(i) map(tofrom : curr_nteams, device_id, errors)
+    {
+      device_id = omp_get_device_num();
+      errors = errors + (device_id != i);
+      curr_nteams = omp_get_max_teams();
+      if (device_id == 0) { errors = errors + (curr_nteams != EXPECTED_NTEAMS_DEV_0); } // device 0
+      if (device_id == 1) { errors = errors + (curr_nteams != EXPECTED_NTEAMS_DEV_1); } // device 1
+    }
+    printf("device: %d nteams: %d\n", device_id, curr_nteams);
+  }
+  return errors;
+}
+
+int main() {
+  int errors = 0;
+  errors = test_nteams_var_env();
+  if (errors)
+    printf("FAIL\n");
+  else
+    printf("PASS\n");
+  return errors;
+}
+
+// CHECK: PASS
\ No newline at end of file
diff --git a/openmp/libomptarget/test/env/omp_set_num_teams.c b/openmp/libomptarget/test/env/omp_set_num_teams.c
new file mode 100644
index 000000000000000..9b798518bfc5e91
--- /dev/null
+++ b/openmp/libomptarget/test/env/omp_set_num_teams.c
@@ -0,0 +1,57 @@
+// Test functionality of omp_set_num_teams() with setting
+// environment variable as an upper bound. Test for negative
+// value and value that is larger than the upper bound.
+
+// RUN: %libomptarget-compile-generic -fopenmp-offload-mandatory
+// RUN: env OMP_NUM_TEAMS_DEV_0=3 LIBOMPTARGET_INFO=16\
+// RUN: %libomptarget-run-generic
+
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+
+#include <omp.h>
+#include <stdio.h>
+
+const int EXPECTED_NTEAMS = 3;
+
+void omp_set_num_teams(int V);
+int omp_get_max_teams(void);
+
+int test_set_over_max(void) {
+  int errors = 0;
+  int n_devs;
+  int curr_nteams = -1;
+
+#pragma omp target map(tofrom : n_devs)
+  { n_devs = omp_get_num_devices(); }
+
+#pragma omp target device(0) map(tofrom : curr_nteams, errors)
+  {
+    omp_set_num_teams(3 + 1);
+    curr_nteams = omp_get_max_teams();
+    errors = errors + (curr_nteams != 3);
+
+    omp_set_num_teams(-1);
+    curr_nteams = omp_get_max_teams();
+    errors = errors + (curr_nteams != 0);
+
+    omp_set_num_teams(3);
+    curr_nteams = omp_get_max_teams();
+    errors = errors + (curr_nteams != 3);
+  }
+  return errors;
+}
+
+int main() {
+  int errors = 0;
+  errors = test_set_over_max();
+  if (errors)
+    printf("FAIL\n");
+  else
+    printf("PASS\n");
+  return errors;
+}
+
+// CHECK: PASS
\ No newline at end of file



More information about the Openmp-commits mailing list