[Openmp-commits] [openmp] ddfb074 - [libomptarget][nfc] Group environment variables, drop accesses to DeviceInfo global
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jul 6 09:06:53 PDT 2021
Author: Jon Chesterfield
Date: 2021-07-06T17:06:38+01:00
New Revision: ddfb074a80a24f94290d895b74d2c80626d953ba
URL: https://github.com/llvm/llvm-project/commit/ddfb074a80a24f94290d895b74d2c80626d953ba
DIFF: https://github.com/llvm/llvm-project/commit/ddfb074a80a24f94290d895b74d2c80626d953ba.diff
LOG: [libomptarget][nfc] Group environment variables, drop accesses to DeviceInfo global
[libomptarget][nfc] Group environment variables, drop accesses to DeviceInfo global
Folds some duplicates logic into a helper function, passes the new environment
struct into getLaunchVals which no longer reads the DeviceInfo global.
Implemented on top of D105237
Reviewed By: dhruvachak
Differential Revision: https://reviews.llvm.org/D105239
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 8f40778083cf1..2c8d860a63ca3 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -428,6 +428,13 @@ FindKernargPool(const std::vector<hsa_agent_t> &HSAAgents) {
} // namespace
} // namespace core
+struct EnvironmentVariables {
+ int NumTeams;
+ int TeamLimit;
+ int TeamThreadLimit;
+ int MaxTeamsDefault;
+};
+
/// Class containing all the device information
class RTLDeviceInfoTy {
std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
@@ -458,10 +465,7 @@ class RTLDeviceInfoTy {
std::vector<int> NumThreads;
// OpenMP Environment properties
- int EnvNumTeams;
- int EnvTeamLimit;
- int EnvTeamThreadLimit;
- int EnvMaxTeamsDefault;
+ EnvironmentVariables Env;
// OpenMP Requires Flags
int64_t RequiresFlags;
@@ -663,6 +667,16 @@ class RTLDeviceInfoTy {
return HostFineGrainedMemoryPool;
}
+ static int readEnvElseMinusOne(const char *Env) {
+ const char *envStr = getenv(Env);
+ int res = -1;
+ if (envStr) {
+ res = std::stoi(envStr);
+ DP("Parsed %s=%d\n", Env, res);
+ }
+ return res;
+ }
+
RTLDeviceInfoTy() {
// LIBOMPTARGET_KERNEL_TRACE provides a kernel launch trace to stderr
// anytime. You do not need a debug library build.
@@ -768,37 +782,10 @@ class RTLDeviceInfoTy {
}
// Get environment variables regarding teams
- char *envStr = getenv("OMP_TEAM_LIMIT");
- if (envStr) {
- // OMP_TEAM_LIMIT has been set
- EnvTeamLimit = std::stoi(envStr);
- DP("Parsed OMP_TEAM_LIMIT=%d\n", EnvTeamLimit);
- } else {
- EnvTeamLimit = -1;
- }
- envStr = getenv("OMP_NUM_TEAMS");
- if (envStr) {
- // OMP_NUM_TEAMS has been set
- EnvNumTeams = std::stoi(envStr);
- DP("Parsed OMP_NUM_TEAMS=%d\n", EnvNumTeams);
- } else {
- EnvNumTeams = -1;
- }
- // Get environment variables regarding expMaxTeams
- envStr = getenv("OMP_MAX_TEAMS_DEFAULT");
- if (envStr) {
- EnvMaxTeamsDefault = std::stoi(envStr);
- DP("Parsed OMP_MAX_TEAMS_DEFAULT=%d\n", EnvMaxTeamsDefault);
- } else {
- EnvMaxTeamsDefault = -1;
- }
- envStr = getenv("OMP_TEAMS_THREAD_LIMIT");
- if (envStr) {
- EnvTeamThreadLimit = std::stoi(envStr);
- DP("Parsed OMP_TEAMS_THREAD_LIMIT=%d\n", EnvTeamThreadLimit);
- } else {
- EnvTeamThreadLimit = -1;
- }
+ Env.TeamLimit = readEnvElseMinusOne("OMP_TEAM_LIMIT");
+ Env.NumTeams = readEnvElseMinusOne("OMP_NUM_TEAMS");
+ Env.MaxTeamsDefault = readEnvElseMinusOne("OMP_MAX_TEAMS_DEFAULT");
+ Env.TeamThreadLimit = readEnvElseMinusOne("OMP_TEAMS_THREAD_LIMIT");
// Default state.
RequiresFlags = OMP_REQ_UNDEFINED;
@@ -1073,18 +1060,18 @@ int32_t __tgt_rtl_init_device(int device_id) {
// Adjust teams to the env variables
- if (DeviceInfo.EnvTeamLimit > 0 &&
+ if (DeviceInfo.Env.TeamLimit > 0 &&
(enforce_upper_bound(&DeviceInfo.GroupsPerDevice[device_id],
- DeviceInfo.EnvTeamLimit))) {
+ DeviceInfo.Env.TeamLimit))) {
DP("Capping max groups per device to OMP_TEAM_LIMIT=%d\n",
- DeviceInfo.EnvTeamLimit);
+ DeviceInfo.Env.TeamLimit);
}
// Set default number of teams
- if (DeviceInfo.EnvNumTeams > 0) {
- DeviceInfo.NumTeams[device_id] = DeviceInfo.EnvNumTeams;
+ if (DeviceInfo.Env.NumTeams > 0) {
+ DeviceInfo.NumTeams[device_id] = DeviceInfo.Env.NumTeams;
DP("Default number of teams set according to environment %d\n",
- DeviceInfo.EnvNumTeams);
+ DeviceInfo.Env.NumTeams);
} else {
char *TeamsPerCUEnvStr = getenv("OMP_TARGET_TEAMS_PER_PROC");
int TeamsPerCU = DefaultTeamsPerCU;
@@ -1105,11 +1092,11 @@ int32_t __tgt_rtl_init_device(int device_id) {
}
// Adjust threads to the env variables
- if (DeviceInfo.EnvTeamThreadLimit > 0 &&
+ if (DeviceInfo.Env.TeamThreadLimit > 0 &&
(enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
- DeviceInfo.EnvTeamThreadLimit))) {
+ DeviceInfo.Env.TeamThreadLimit))) {
DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
- DeviceInfo.EnvTeamThreadLimit);
+ DeviceInfo.Env.TeamThreadLimit);
}
// Set default number of threads
@@ -1880,28 +1867,22 @@ int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) {
return OFFLOAD_SUCCESS;
}
-// Determine launch values for threadsPerGroup and num_groups.
-// Outputs: treadsPerGroup, num_groups
-// Inputs: Max_Teams, Max_WG_Size, Warp_Size, ExecutionMode,
-// EnvTeamLimit, EnvNumTeams, num_teams, thread_limit,
-// loop_tripcount.
+// Determine launch values for kernel.
struct launchVals {
int WorkgroupSize;
int GridSize;
};
-
-launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
- int EnvNumTeams, int num_teams, int thread_limit,
+launchVals getLaunchVals(EnvironmentVariables Env, int ConstWGSize,
+ int ExecutionMode, int num_teams, int thread_limit,
uint64_t loop_tripcount, int DeviceNumTeams) {
int threadsPerGroup = RTLDeviceInfoTy::Default_WG_Size;
int num_groups = 0;
- int Max_Teams = DeviceInfo.EnvMaxTeamsDefault > 0
- ? DeviceInfo.EnvMaxTeamsDefault
- : DeviceNumTeams;
- if (Max_Teams > DeviceInfo.HardTeamLimit)
- Max_Teams = DeviceInfo.HardTeamLimit;
+ int Max_Teams =
+ Env.MaxTeamsDefault > 0 ? Env.MaxTeamsDefault : DeviceNumTeams;
+ if (Max_Teams > RTLDeviceInfoTy::HardTeamLimit)
+ Max_Teams = RTLDeviceInfoTy::HardTeamLimit;
if (print_kernel_trace & STARTUP_DETAILS) {
fprintf(stderr, "RTLDeviceInfoTy::Max_Teams: %d\n",
@@ -1941,10 +1922,8 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
DP("Preparing %d threads\n", threadsPerGroup);
// Set default num_groups (teams)
- if (DeviceInfo.EnvTeamLimit > 0)
- num_groups = (Max_Teams < DeviceInfo.EnvTeamLimit)
- ? Max_Teams
- : DeviceInfo.EnvTeamLimit;
+ if (Env.TeamLimit > 0)
+ num_groups = (Max_Teams < Env.TeamLimit) ? Max_Teams : Env.TeamLimit;
else
num_groups = Max_Teams;
DP("Set default num of groups %d\n", num_groups);
@@ -1971,19 +1950,16 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
}
if (print_kernel_trace & STARTUP_DETAILS) {
fprintf(stderr, "num_groups: %d\n", num_groups);
- fprintf(stderr, "DeviceInfo.EnvNumTeams %d\n", DeviceInfo.EnvNumTeams);
- fprintf(stderr, "DeviceInfo.EnvTeamLimit %d\n", DeviceInfo.EnvTeamLimit);
- }
-
- if (DeviceInfo.EnvNumTeams > 0) {
- num_groups = (DeviceInfo.EnvNumTeams < num_groups) ? DeviceInfo.EnvNumTeams
- : num_groups;
- DP("Modifying teams based on EnvNumTeams %d\n", DeviceInfo.EnvNumTeams);
- } else if (DeviceInfo.EnvTeamLimit > 0) {
- num_groups = (DeviceInfo.EnvTeamLimit < num_groups)
- ? DeviceInfo.EnvTeamLimit
- : num_groups;
- DP("Modifying teams based on EnvTeamLimit%d\n", DeviceInfo.EnvTeamLimit);
+ fprintf(stderr, "Env.NumTeams %d\n", Env.NumTeams);
+ fprintf(stderr, "Env.TeamLimit %d\n", Env.TeamLimit);
+ }
+
+ if (Env.NumTeams > 0) {
+ num_groups = (Env.NumTeams < num_groups) ? Env.NumTeams : num_groups;
+ DP("Modifying teams based on Env.NumTeams %d\n", Env.NumTeams);
+ } else if (Env.TeamLimit > 0) {
+ num_groups = (Env.TeamLimit < num_groups) ? Env.TeamLimit : num_groups;
+ DP("Modifying teams based on Env.TeamLimit%d\n", Env.TeamLimit);
} else {
if (num_teams <= 0) {
if (loop_tripcount > 0) {
@@ -2018,9 +1994,8 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
if (num_teams > 0) {
num_groups = num_teams;
// Cap num_groups to EnvMaxTeamsDefault if set.
- if (DeviceInfo.EnvMaxTeamsDefault > 0 &&
- num_groups > DeviceInfo.EnvMaxTeamsDefault)
- num_groups = DeviceInfo.EnvMaxTeamsDefault;
+ if (Env.MaxTeamsDefault > 0 && num_groups > Env.MaxTeamsDefault)
+ num_groups = Env.MaxTeamsDefault;
}
if (print_kernel_trace & STARTUP_DETAILS) {
fprintf(stderr, "threadsPerGroup: %d\n", threadsPerGroup);
@@ -2111,13 +2086,12 @@ int32_t __tgt_rtl_run_target_team_region_locked(
/*
* Set limit based on ThreadsPerGroup and GroupsPerDevice
*/
- launchVals LV =
- getLaunchVals(KernelInfo->ConstWGSize, KernelInfo->ExecutionMode,
- DeviceInfo.EnvTeamLimit, DeviceInfo.EnvNumTeams,
- num_teams, // From run_region arg
- thread_limit, // From run_region arg
- loop_tripcount, // From run_region arg
- DeviceInfo.NumTeams[KernelInfo->device_id]);
+ launchVals LV = getLaunchVals(DeviceInfo.Env, KernelInfo->ConstWGSize,
+ KernelInfo->ExecutionMode,
+ num_teams, // From run_region arg
+ thread_limit, // From run_region arg
+ loop_tripcount, // From run_region arg
+ DeviceInfo.NumTeams[KernelInfo->device_id]);
const int GridSize = LV.GridSize;
const int WorkgroupSize = LV.WorkgroupSize;
More information about the Openmp-commits
mailing list