[Openmp-commits] [openmp] ddfb074 - [libomptarget][nfc] Group environment variables, drop accesses to DeviceInfo global

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Tue Jul 6 09:06:53 PDT 2021


Author: Jon Chesterfield
Date: 2021-07-06T17:06:38+01:00
New Revision: ddfb074a80a24f94290d895b74d2c80626d953ba

URL: https://github.com/llvm/llvm-project/commit/ddfb074a80a24f94290d895b74d2c80626d953ba
DIFF: https://github.com/llvm/llvm-project/commit/ddfb074a80a24f94290d895b74d2c80626d953ba.diff

LOG: [libomptarget][nfc] Group environment variables, drop accesses to DeviceInfo global

[libomptarget][nfc] Group environment variables, drop accesses to DeviceInfo global

Folds some duplicates logic into a helper function, passes the new environment
struct into getLaunchVals which no longer reads the DeviceInfo global.

Implemented on top of D105237

Reviewed By: dhruvachak

Differential Revision: https://reviews.llvm.org/D105239

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 8f40778083cf1..2c8d860a63ca3 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -428,6 +428,13 @@ FindKernargPool(const std::vector<hsa_agent_t> &HSAAgents) {
 } // namespace
 } // namespace core
 
+struct EnvironmentVariables {
+  int NumTeams;
+  int TeamLimit;
+  int TeamThreadLimit;
+  int MaxTeamsDefault;
+};
+
 /// Class containing all the device information
 class RTLDeviceInfoTy {
   std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
@@ -458,10 +465,7 @@ class RTLDeviceInfoTy {
   std::vector<int> NumThreads;
 
   // OpenMP Environment properties
-  int EnvNumTeams;
-  int EnvTeamLimit;
-  int EnvTeamThreadLimit;
-  int EnvMaxTeamsDefault;
+  EnvironmentVariables Env;
 
   // OpenMP Requires Flags
   int64_t RequiresFlags;
@@ -663,6 +667,16 @@ class RTLDeviceInfoTy {
     return HostFineGrainedMemoryPool;
   }
 
+  static int readEnvElseMinusOne(const char *Env) {
+    const char *envStr = getenv(Env);
+    int res = -1;
+    if (envStr) {
+      res = std::stoi(envStr);
+      DP("Parsed %s=%d\n", Env, res);
+    }
+    return res;
+  }
+
   RTLDeviceInfoTy() {
     // LIBOMPTARGET_KERNEL_TRACE provides a kernel launch trace to stderr
     // anytime. You do not need a debug library build.
@@ -768,37 +782,10 @@ class RTLDeviceInfoTy {
     }
 
     // Get environment variables regarding teams
-    char *envStr = getenv("OMP_TEAM_LIMIT");
-    if (envStr) {
-      // OMP_TEAM_LIMIT has been set
-      EnvTeamLimit = std::stoi(envStr);
-      DP("Parsed OMP_TEAM_LIMIT=%d\n", EnvTeamLimit);
-    } else {
-      EnvTeamLimit = -1;
-    }
-    envStr = getenv("OMP_NUM_TEAMS");
-    if (envStr) {
-      // OMP_NUM_TEAMS has been set
-      EnvNumTeams = std::stoi(envStr);
-      DP("Parsed OMP_NUM_TEAMS=%d\n", EnvNumTeams);
-    } else {
-      EnvNumTeams = -1;
-    }
-    // Get environment variables regarding expMaxTeams
-    envStr = getenv("OMP_MAX_TEAMS_DEFAULT");
-    if (envStr) {
-      EnvMaxTeamsDefault = std::stoi(envStr);
-      DP("Parsed OMP_MAX_TEAMS_DEFAULT=%d\n", EnvMaxTeamsDefault);
-    } else {
-      EnvMaxTeamsDefault = -1;
-    }
-    envStr = getenv("OMP_TEAMS_THREAD_LIMIT");
-    if (envStr) {
-      EnvTeamThreadLimit = std::stoi(envStr);
-      DP("Parsed OMP_TEAMS_THREAD_LIMIT=%d\n", EnvTeamThreadLimit);
-    } else {
-      EnvTeamThreadLimit = -1;
-    }
+    Env.TeamLimit = readEnvElseMinusOne("OMP_TEAM_LIMIT");
+    Env.NumTeams = readEnvElseMinusOne("OMP_NUM_TEAMS");
+    Env.MaxTeamsDefault = readEnvElseMinusOne("OMP_MAX_TEAMS_DEFAULT");
+    Env.TeamThreadLimit = readEnvElseMinusOne("OMP_TEAMS_THREAD_LIMIT");
 
     // Default state.
     RequiresFlags = OMP_REQ_UNDEFINED;
@@ -1073,18 +1060,18 @@ int32_t __tgt_rtl_init_device(int device_id) {
 
   // Adjust teams to the env variables
 
-  if (DeviceInfo.EnvTeamLimit > 0 &&
+  if (DeviceInfo.Env.TeamLimit > 0 &&
       (enforce_upper_bound(&DeviceInfo.GroupsPerDevice[device_id],
-                           DeviceInfo.EnvTeamLimit))) {
+                           DeviceInfo.Env.TeamLimit))) {
     DP("Capping max groups per device to OMP_TEAM_LIMIT=%d\n",
-       DeviceInfo.EnvTeamLimit);
+       DeviceInfo.Env.TeamLimit);
   }
 
   // Set default number of teams
-  if (DeviceInfo.EnvNumTeams > 0) {
-    DeviceInfo.NumTeams[device_id] = DeviceInfo.EnvNumTeams;
+  if (DeviceInfo.Env.NumTeams > 0) {
+    DeviceInfo.NumTeams[device_id] = DeviceInfo.Env.NumTeams;
     DP("Default number of teams set according to environment %d\n",
-       DeviceInfo.EnvNumTeams);
+       DeviceInfo.Env.NumTeams);
   } else {
     char *TeamsPerCUEnvStr = getenv("OMP_TARGET_TEAMS_PER_PROC");
     int TeamsPerCU = DefaultTeamsPerCU;
@@ -1105,11 +1092,11 @@ int32_t __tgt_rtl_init_device(int device_id) {
   }
 
   // Adjust threads to the env variables
-  if (DeviceInfo.EnvTeamThreadLimit > 0 &&
+  if (DeviceInfo.Env.TeamThreadLimit > 0 &&
       (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
-                           DeviceInfo.EnvTeamThreadLimit))) {
+                           DeviceInfo.Env.TeamThreadLimit))) {
     DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
-       DeviceInfo.EnvTeamThreadLimit);
+       DeviceInfo.Env.TeamThreadLimit);
   }
 
   // Set default number of threads
@@ -1880,28 +1867,22 @@ int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) {
   return OFFLOAD_SUCCESS;
 }
 
-// Determine launch values for threadsPerGroup and num_groups.
-// Outputs: treadsPerGroup, num_groups
-// Inputs: Max_Teams, Max_WG_Size, Warp_Size, ExecutionMode,
-//         EnvTeamLimit, EnvNumTeams, num_teams, thread_limit,
-//         loop_tripcount.
+// Determine launch values for kernel.
 struct launchVals {
   int WorkgroupSize;
   int GridSize;
 };
-
-launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
-                         int EnvNumTeams, int num_teams, int thread_limit,
+launchVals getLaunchVals(EnvironmentVariables Env, int ConstWGSize,
+                         int ExecutionMode, int num_teams, int thread_limit,
                          uint64_t loop_tripcount, int DeviceNumTeams) {
 
   int threadsPerGroup = RTLDeviceInfoTy::Default_WG_Size;
   int num_groups = 0;
 
-  int Max_Teams = DeviceInfo.EnvMaxTeamsDefault > 0
-                      ? DeviceInfo.EnvMaxTeamsDefault
-                      : DeviceNumTeams;
-  if (Max_Teams > DeviceInfo.HardTeamLimit)
-    Max_Teams = DeviceInfo.HardTeamLimit;
+  int Max_Teams =
+      Env.MaxTeamsDefault > 0 ? Env.MaxTeamsDefault : DeviceNumTeams;
+  if (Max_Teams > RTLDeviceInfoTy::HardTeamLimit)
+    Max_Teams = RTLDeviceInfoTy::HardTeamLimit;
 
   if (print_kernel_trace & STARTUP_DETAILS) {
     fprintf(stderr, "RTLDeviceInfoTy::Max_Teams: %d\n",
@@ -1941,10 +1922,8 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
   DP("Preparing %d threads\n", threadsPerGroup);
 
   // Set default num_groups (teams)
-  if (DeviceInfo.EnvTeamLimit > 0)
-    num_groups = (Max_Teams < DeviceInfo.EnvTeamLimit)
-                     ? Max_Teams
-                     : DeviceInfo.EnvTeamLimit;
+  if (Env.TeamLimit > 0)
+    num_groups = (Max_Teams < Env.TeamLimit) ? Max_Teams : Env.TeamLimit;
   else
     num_groups = Max_Teams;
   DP("Set default num of groups %d\n", num_groups);
@@ -1971,19 +1950,16 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
   }
   if (print_kernel_trace & STARTUP_DETAILS) {
     fprintf(stderr, "num_groups: %d\n", num_groups);
-    fprintf(stderr, "DeviceInfo.EnvNumTeams %d\n", DeviceInfo.EnvNumTeams);
-    fprintf(stderr, "DeviceInfo.EnvTeamLimit %d\n", DeviceInfo.EnvTeamLimit);
-  }
-
-  if (DeviceInfo.EnvNumTeams > 0) {
-    num_groups = (DeviceInfo.EnvNumTeams < num_groups) ? DeviceInfo.EnvNumTeams
-                                                       : num_groups;
-    DP("Modifying teams based on EnvNumTeams %d\n", DeviceInfo.EnvNumTeams);
-  } else if (DeviceInfo.EnvTeamLimit > 0) {
-    num_groups = (DeviceInfo.EnvTeamLimit < num_groups)
-                     ? DeviceInfo.EnvTeamLimit
-                     : num_groups;
-    DP("Modifying teams based on EnvTeamLimit%d\n", DeviceInfo.EnvTeamLimit);
+    fprintf(stderr, "Env.NumTeams %d\n", Env.NumTeams);
+    fprintf(stderr, "Env.TeamLimit %d\n", Env.TeamLimit);
+  }
+
+  if (Env.NumTeams > 0) {
+    num_groups = (Env.NumTeams < num_groups) ? Env.NumTeams : num_groups;
+    DP("Modifying teams based on Env.NumTeams %d\n", Env.NumTeams);
+  } else if (Env.TeamLimit > 0) {
+    num_groups = (Env.TeamLimit < num_groups) ? Env.TeamLimit : num_groups;
+    DP("Modifying teams based on Env.TeamLimit%d\n", Env.TeamLimit);
   } else {
     if (num_teams <= 0) {
       if (loop_tripcount > 0) {
@@ -2018,9 +1994,8 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
   if (num_teams > 0) {
     num_groups = num_teams;
     // Cap num_groups to EnvMaxTeamsDefault if set.
-    if (DeviceInfo.EnvMaxTeamsDefault > 0 &&
-        num_groups > DeviceInfo.EnvMaxTeamsDefault)
-      num_groups = DeviceInfo.EnvMaxTeamsDefault;
+    if (Env.MaxTeamsDefault > 0 && num_groups > Env.MaxTeamsDefault)
+      num_groups = Env.MaxTeamsDefault;
   }
   if (print_kernel_trace & STARTUP_DETAILS) {
     fprintf(stderr, "threadsPerGroup: %d\n", threadsPerGroup);
@@ -2111,13 +2086,12 @@ int32_t __tgt_rtl_run_target_team_region_locked(
   /*
    * Set limit based on ThreadsPerGroup and GroupsPerDevice
    */
-  launchVals LV =
-      getLaunchVals(KernelInfo->ConstWGSize, KernelInfo->ExecutionMode,
-                    DeviceInfo.EnvTeamLimit, DeviceInfo.EnvNumTeams,
-                    num_teams,      // From run_region arg
-                    thread_limit,   // From run_region arg
-                    loop_tripcount, // From run_region arg
-                    DeviceInfo.NumTeams[KernelInfo->device_id]);
+  launchVals LV = getLaunchVals(DeviceInfo.Env, KernelInfo->ConstWGSize,
+                                KernelInfo->ExecutionMode,
+                                num_teams,      // From run_region arg
+                                thread_limit,   // From run_region arg
+                                loop_tripcount, // From run_region arg
+                                DeviceInfo.NumTeams[KernelInfo->device_id]);
   const int GridSize = LV.GridSize;
   const int WorkgroupSize = LV.WorkgroupSize;
 


        


More information about the Openmp-commits mailing list