[Openmp-commits] [openmp] 422adaa - [OpenMP] Add thread limit environment variable support to plugins
via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jun 22 13:25:50 PDT 2021
Author: Joseph Huber
Date: 2021-06-22T16:25:40-04:00
New Revision: 422adaa879b2de5a682eaed1a4f7cf86e9ea12b4
URL: https://github.com/llvm/llvm-project/commit/422adaa879b2de5a682eaed1a4f7cf86e9ea12b4
DIFF: https://github.com/llvm/llvm-project/commit/422adaa879b2de5a682eaed1a4f7cf86e9ea12b4.diff
LOG: [OpenMP] Add thread limit environment variable support to plugins
The OpenMP 5.1 standard defines the environment variable
`OMP_TEAMS_THREAD_LIMIT` to limit the number of threads that will be run in a
single block. This patch adds support for this into the AMDGPU and CUDA
plugins.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D103923
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
openmp/libomptarget/plugins/cuda/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 8a4e12cff549f..012bd1f9b7010 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -405,6 +405,7 @@ class RTLDeviceInfoTy {
// OpenMP Environment properties
int EnvNumTeams;
int EnvTeamLimit;
+ int EnvTeamThreadLimit;
int EnvMaxTeamsDefault;
// OpenMP Requires Flags
@@ -645,6 +646,13 @@ class RTLDeviceInfoTy {
} else {
EnvMaxTeamsDefault = -1;
}
+ envStr = getenv("OMP_TEAMS_THREAD_LIMIT");
+ if (envStr) {
+ EnvTeamThreadLimit = std::stoi(envStr);
+ DP("Parsed OMP_TEAMS_THREAD_LIMIT=%d\n", EnvTeamThreadLimit);
+ } else {
+ EnvTeamThreadLimit = -1;
+ }
// Default state.
RequiresFlags = OMP_REQ_UNDEFINED;
@@ -950,6 +958,14 @@ int32_t __tgt_rtl_init_device(int device_id) {
DeviceInfo.GroupsPerDevice[device_id]);
}
+ // Adjust threads to the env variables
+ if (DeviceInfo.EnvTeamThreadLimit > 0 &&
+ (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
+ DeviceInfo.EnvTeamThreadLimit))) {
+ DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
+ DeviceInfo.EnvTeamThreadLimit);
+ }
+
// Set default number of threads
DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size;
DP("Default number of threads set according to library's default %d\n",
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index e8fe63736b939..7b04bc9180e5b 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -281,6 +281,7 @@ class DeviceRTLTy {
// OpenMP environment properties
int EnvNumTeams;
int EnvTeamLimit;
+ int EnvTeamThreadLimit;
// OpenMP requires flags
int64_t RequiresFlags;
@@ -436,7 +437,7 @@ class DeviceRTLTy {
DeviceRTLTy()
: NumberOfDevices(0), EnvNumTeams(-1), EnvTeamLimit(-1),
- RequiresFlags(OMP_REQ_UNDEFINED) {
+ EnvTeamThreadLimit(-1), RequiresFlags(OMP_REQ_UNDEFINED) {
DP("Start initializing CUDA\n");
@@ -467,6 +468,11 @@ class DeviceRTLTy {
EnvTeamLimit = std::stoi(EnvStr);
DP("Parsed OMP_TEAM_LIMIT=%d\n", EnvTeamLimit);
}
+ if (const char *EnvStr = getenv("OMP_TEAMS_THREAD_LIMIT")) {
+ // OMP_TEAMS_THREAD_LIMIT has been set
+ EnvTeamThreadLimit = std::stoi(EnvStr);
+ DP("Parsed OMP_TEAMS_THREAD_LIMIT=%d\n", EnvTeamThreadLimit);
+ }
if (const char *EnvStr = getenv("OMP_NUM_TEAMS")) {
// OMP_NUM_TEAMS has been set
EnvNumTeams = std::stoi(EnvStr);
@@ -596,14 +602,23 @@ class DeviceRTLTy {
DP("Error getting max block dimension, use default value %d\n",
DeviceRTLTy::DefaultNumThreads);
DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads;
- } else if (MaxBlockDimX <= DeviceRTLTy::HardThreadLimit) {
+ } else {
DP("Using %d CUDA threads per block\n", MaxBlockDimX);
DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX;
- } else {
- DP("Max CUDA threads per block %d exceeds the hard thread limit %d, "
- "capping at the hard limit\n",
- MaxBlockDimX, DeviceRTLTy::HardThreadLimit);
- DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit;
+
+ if (EnvTeamThreadLimit > 0 &&
+ DeviceData[DeviceId].ThreadsPerBlock > EnvTeamThreadLimit) {
+ DP("Max CUDA threads per block %d exceeds the thread limit %d set by "
+ "OMP_TEAMS_THREAD_LIMIT, capping at the limit\n",
+ DeviceData[DeviceId].ThreadsPerBlock, EnvTeamThreadLimit);
+ DeviceData[DeviceId].ThreadsPerBlock = EnvTeamThreadLimit;
+ }
+ if (DeviceData[DeviceId].ThreadsPerBlock > DeviceRTLTy::HardThreadLimit) {
+ DP("Max CUDA threads per block %d exceeds the hard thread limit %d, "
+ "capping at the hard limit\n",
+ DeviceData[DeviceId].ThreadsPerBlock, DeviceRTLTy::HardThreadLimit);
+ DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit;
+ }
}
// Get and set warp size
More information about the Openmp-commits
mailing list