[Openmp-commits] [openmp] [OpenMP] Remove optimization skipping reduction struct initialization (PR #65697)
Rodrigo Ceccato de Freitas via Openmp-commits
openmp-commits at lists.llvm.org
Fri Sep 8 10:37:42 PDT 2023
https://github.com/rodrigo-ceccato updated https://github.com/llvm/llvm-project/pull/65697:
>From 5c2588d704a6343635595a1eb52ea05b1b9d4660 Mon Sep 17 00:00:00 2001
From: Rodrigo Ceccato <rodrigoceccatodefreitas at gmail.com>
Date: Tue, 5 Sep 2023 21:18:11 +0000
Subject: [PATCH 1/2] [OpenMP Runtime] Remove optimization skipping reduction
struct initialization
This commit removes an optimization that skips the initialization of the
reduction struct if the number of threads in a team is 1. This optimization
caused a bug with Hidden Helper Threads. When the task group is initially
initialized by the master thread but a Hidden Helper Thread executes a target
nowait region, it requires the reduction struct initialization to properly
accumulate the data.
This commit also adds a LIT test for issue #57522 to ensure that the issue is
properly addressed and that the optimization removal does not introduce any
regressions.
Fixes: #57522
---
.../offloading/task_in_reduction_target.c | 34 +++++++++++++++++++
openmp/runtime/src/kmp_tasking.cpp | 7 +---
2 files changed, 35 insertions(+), 6 deletions(-)
create mode 100644 openmp/libomptarget/test/offloading/task_in_reduction_target.c
diff --git a/openmp/libomptarget/test/offloading/task_in_reduction_target.c b/openmp/libomptarget/test/offloading/task_in_reduction_target.c
new file mode 100644
index 000000000000000..45b426477020d8f
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/task_in_reduction_target.c
@@ -0,0 +1,34 @@
+// RUN: %libomptarget-compile-generic && \
+// RUN: %libomptarget-run-generic
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+
+ int num_devices = omp_get_num_devices();
+
+ // No target device, just return
+ if (num_devices == 0) {
+ printf("PASS\n");
+ return 0;
+ }
+
+ double sum = 999;
+ double A = 311;
+
+#pragma omp taskgroup task_reduction(+ : sum)
+ {
+#pragma omp target map(to : A) in_reduction(+ : sum) device(0) nowait
+ { sum += A; }
+
+#pragma omp target map(to : A) in_reduction(+ : sum) device(1) nowait
+ { sum += A; }
+ }
+
+ printf("PASS\n");
+ return EXIT_SUCCESS;
+}
+
+// CHECK: PASS
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fefa609927e8933..fcbab2bf5d55b81 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -2512,11 +2512,6 @@ void *__kmp_task_reduction_init(int gtid, int num, T *data) {
KMP_ASSERT(tg != NULL);
KMP_ASSERT(data != NULL);
KMP_ASSERT(num > 0);
- if (nth == 1) {
- KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
- gtid, tg));
- return (void *)tg;
- }
KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
gtid, tg, num));
arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
@@ -2699,6 +2694,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
return p_priv[tid];
}
}
+ KMP_ASSERT(tg->parent != NULL);
tg = tg->parent;
arr = (kmp_taskred_data_t *)(tg->reduce_data);
num = tg->reduce_num_data;
@@ -2711,7 +2707,6 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
// Called from __kmpc_end_taskgroup()
static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
kmp_int32 nth = th->th.th_team_nproc;
- KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
kmp_int32 num = tg->reduce_num_data;
for (int i = 0; i < num; ++i) {
>From 75f74e327a8f68b87a9d33c84c537363100b2444 Mon Sep 17 00:00:00 2001
From: Rodrigo Ceccato <rodrigoceccatodefreitas at gmail.com>
Date: Fri, 8 Sep 2023 17:34:27 +0000
Subject: [PATCH 2/2] Apply review suggestions
* Add early return back if nth=1, but skip if hidden helper threads
enabled
* Use KMP_ASSERT(tg->parent) instead of KMP_ASSERT(tg->parent != NULL)
---
openmp/runtime/src/kmp_tasking.cpp | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fcbab2bf5d55b81..8fd4edf1e4406cc 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -2512,6 +2512,11 @@ void *__kmp_task_reduction_init(int gtid, int num, T *data) {
KMP_ASSERT(tg != NULL);
KMP_ASSERT(data != NULL);
KMP_ASSERT(num > 0);
+ if (nth == 1 && !__kmp_enable_hidden_helper) {
+ KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
+ gtid, tg));
+ return (void *)tg;
+ }
KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
gtid, tg, num));
arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
@@ -2694,7 +2699,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
return p_priv[tid];
}
}
- KMP_ASSERT(tg->parent != NULL);
+ KMP_ASSERT(tg->parent);
tg = tg->parent;
arr = (kmp_taskred_data_t *)(tg->reduce_data);
num = tg->reduce_num_data;
@@ -2707,6 +2712,10 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
// Called from __kmpc_end_taskgroup()
static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
kmp_int32 nth = th->th.th_team_nproc;
+ KMP_DEBUG_ASSERT(
+ nth > 1 ||
+ __kmp_enable_hidden_helper); // should not be called if nth == 1 unless we
+ // are ussing hidden helper threads
kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
kmp_int32 num = tg->reduce_num_data;
for (int i = 0; i < num; ++i) {
More information about the Openmp-commits
mailing list