[Openmp-commits] [openmp] [OpenMP] Add Environment Variable to disable Reuse of Blocks for High Loop Trip Counts (PR #89239)
Tim Gymnich via Openmp-commits
openmp-commits at lists.llvm.org
Fri Apr 19 06:35:23 PDT 2024
https://github.com/tgymnich updated https://github.com/llvm/llvm-project/pull/89239
>From 78a9ab8b34436a2ffceb33b9701f057b5f7b1308 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Wed, 10 Apr 2024 18:39:02 +0000
Subject: [PATCH 1/7] Add Environment Variable to disable Reuse of Blocks for
high Loop Trip Counts
---
.../plugins-nextgen/common/include/PluginInterface.h | 9 +++++++++
.../plugins-nextgen/common/src/PluginInterface.cpp | 6 +++++-
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 79e8464bfda5c1..936e090dc07a16 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -829,6 +829,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
return OMPX_MinThreadsForLowTripCount;
}
+ /// Whether or not to reuse blocks for high trip count loops.
+ /// @see OMPX__ReuseBlocksForHighTripCount
+ virtual bool getReuseBlocksForHighTripCount() {
+ return OMPX__ReuseBlocksForHighTripCount;
+ }
+
/// Get the total amount of hardware parallelism supported by the target
/// device. This is the total amount of warps or wavefronts that can be
/// resident on the device simultaneously.
@@ -904,6 +910,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
UInt32Envar OMPX_MinThreadsForLowTripCount =
UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32);
+ BoolEnvar OMPX__ReuseBlocksForHighTripCount =
+ BoolEnvar("LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT", true);
+
protected:
/// Environment variables defined by the LLVM OpenMP implementation
/// regarding the initial number of streams and events.
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index b5f3c45c835fdb..41542ea1123c29 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -705,8 +705,12 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
TripCountNumBlocks = LoopTripCount;
}
}
+
+ uint32_t PreferredNumBlocks = TripCountNumBlocks;
// If the loops are long running we rather reuse blocks than spawn too many.
- uint32_t PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks);
+ if (GenericDevice.getReuseBlocksForHighTripCount()) {
+ PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks);
+ }
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
}
>From b0885ee6cd7bf6188abb2c63a9de968285a5d933 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 19 Apr 2024 14:08:56 +0200
Subject: [PATCH 2/7] Update
openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
Co-authored-by: Joseph Huber <huberjn at outlook.com>
---
.../plugins-nextgen/common/src/PluginInterface.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 41542ea1123c29..d88ba8a47d2708 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -708,9 +708,8 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
uint32_t PreferredNumBlocks = TripCountNumBlocks;
// If the loops are long running we rather reuse blocks than spawn too many.
- if (GenericDevice.getReuseBlocksForHighTripCount()) {
+ if (GenericDevice.getReuseBlocksForHighTripCount())
PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks);
- }
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
}
>From df7ef8429ddaad2ca5bfcaa3146d15fd5e006fd4 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Fri, 19 Apr 2024 14:10:05 +0200
Subject: [PATCH 3/7] remove _ and virtual
---
.../plugins-nextgen/common/include/PluginInterface.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 936e090dc07a16..f7d0e1ccacb9ce 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -830,9 +830,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
}
/// Whether or not to reuse blocks for high trip count loops.
- /// @see OMPX__ReuseBlocksForHighTripCount
- virtual bool getReuseBlocksForHighTripCount() {
- return OMPX__ReuseBlocksForHighTripCount;
+ /// @see OMPX_ReuseBlocksForHighTripCount
+ bool getReuseBlocksForHighTripCount() {
+ return OMPX_ReuseBlocksForHighTripCount;
}
/// Get the total amount of hardware parallelism supported by the target
>From 493c1dcfd1c01019f6d5f20521c3bbe218c81703 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Fri, 19 Apr 2024 14:29:21 +0200
Subject: [PATCH 4/7] add test
---
.../high_trip_count_block_limit.cpp | 23 +++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
diff --git a/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp b/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
new file mode 100644
index 00000000000000..dba6e4a14ecb6b
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
@@ -0,0 +1,23 @@
+// RUN: %libomptarget-compile-generic && env OMPX_ReuseBlocksForHighTripCount=False %libomptarget-run-generic 2>&1 | %fcheck-generic -allow-empty -check-prefix=DEBUG
+// REQUIRES: libomptarget-debug
+
+/*
+ Check if one thread only runs one iteration of the loop
+*/
+#include <omp.h>
+#include <stdio.h>
+#include <assert.h>
+
+int main() {
+ int N = 819200;
+
+ printf("#pragma omp target teams distribute parallel for\n");
+#pragma omp target teams distribute parallel for
+ for (int j = 0; j < N; j++) {
+ int gtid = omp_get_thread_num() + omp_get_team_nun() * omp_get_num_teams();
+ assert(gtid == j);
+ }
+
+ // CHECK: PASS
+ printf("PASS\n");
+}
>From 2d02723de82807f83fbb2e81f9a6689610c71c69 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Fri, 19 Apr 2024 12:42:58 +0000
Subject: [PATCH 5/7] remove _
---
.../plugins-nextgen/common/include/PluginInterface.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index f7d0e1ccacb9ce..1b7a0ca2136e3d 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -910,7 +910,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
UInt32Envar OMPX_MinThreadsForLowTripCount =
UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32);
- BoolEnvar OMPX__ReuseBlocksForHighTripCount =
+ BoolEnvar OMPX_ReuseBlocksForHighTripCount =
BoolEnvar("LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT", true);
protected:
>From 6d39bf64e1d5c43fd8eda33265668410a843b147 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Fri, 19 Apr 2024 13:33:15 +0000
Subject: [PATCH 6/7] fix test
---
.../high_trip_count_block_limit.cpp | 26 ++++++++++++-------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp b/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
index dba6e4a14ecb6b..c1d3768fbf82fc 100644
--- a/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
+++ b/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
@@ -1,23 +1,31 @@
-// RUN: %libomptarget-compile-generic && env OMPX_ReuseBlocksForHighTripCount=False %libomptarget-run-generic 2>&1 | %fcheck-generic -allow-empty -check-prefix=DEBUG
-// REQUIRES: libomptarget-debug
+// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT=False %libomptarget-run-generic 2>&1 | %fcheck-generic
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
/*
- Check if one thread only runs one iteration of the loop
+ Check if there is a thread for each loop iteration
*/
#include <omp.h>
#include <stdio.h>
-#include <assert.h>
int main() {
int N = 819200;
+ int num_threads[N];
- printf("#pragma omp target teams distribute parallel for\n");
-#pragma omp target teams distribute parallel for
+ #pragma omp target teams distribute parallel for
for (int j = 0; j < N; j++) {
- int gtid = omp_get_thread_num() + omp_get_team_nun() * omp_get_num_teams();
- assert(gtid == j);
+ num_threads[j] = omp_get_num_threads() * omp_get_num_teams();
}
// CHECK: PASS
- printf("PASS\n");
+ if (num_threads[0] == N)
+ printf("PASS\n");
+ else
+ printf("FAIL: num_threads: %d\n != N: %d", num_threads[0], N);
+ return 0;
}
>From 59c6931ffbd244096cc84dd72b29a8c70002344b Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Fri, 19 Apr 2024 13:35:05 +0000
Subject: [PATCH 7/7] clang-format
---
.../offloading/high_trip_count_block_limit.cpp | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp b/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
index c1d3768fbf82fc..1972188c93e92a 100644
--- a/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
+++ b/openmp/libomptarget/test/offloading/high_trip_count_block_limit.cpp
@@ -1,11 +1,9 @@
-// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT=False %libomptarget-run-generic 2>&1 | %fcheck-generic
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: x86_64-pc-linux-gnu
-// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
-// UNSUPPORTED: s390x-ibm-linux-gnu
-// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
-
+// RUN: %libomptarget-compilexx-generic && env
+// LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT=False %libomptarget-run-generic
+// 2>&1 | %fcheck-generic UNSUPPORTED: aarch64-unknown-linux-gnu UNSUPPORTED:
+// aarch64-unknown-linux-gnu-LTO UNSUPPORTED: x86_64-pc-linux-gnu UNSUPPORTED:
+// x86_64-pc-linux-gnu-LTO UNSUPPORTED: s390x-ibm-linux-gnu UNSUPPORTED:
+// s390x-ibm-linux-gnu-LTO
/*
Check if there is a thread for each loop iteration
@@ -17,9 +15,9 @@ int main() {
int N = 819200;
int num_threads[N];
- #pragma omp target teams distribute parallel for
+#pragma omp target teams distribute parallel for
for (int j = 0; j < N; j++) {
- num_threads[j] = omp_get_num_threads() * omp_get_num_teams();
+ num_threads[j] = omp_get_num_threads() * omp_get_num_teams();
}
// CHECK: PASS
More information about the Openmp-commits
mailing list