[Openmp-commits] [openmp] [OpenMP] Remove optimization skipping reduction struct initialization (PR #65697)

Rodrigo Ceccato de Freitas via Openmp-commits openmp-commits at lists.llvm.org
Fri Sep 8 10:37:42 PDT 2023


https://github.com/rodrigo-ceccato updated https://github.com/llvm/llvm-project/pull/65697:

>From 5c2588d704a6343635595a1eb52ea05b1b9d4660 Mon Sep 17 00:00:00 2001
From: Rodrigo Ceccato <rodrigoceccatodefreitas at gmail.com>
Date: Tue, 5 Sep 2023 21:18:11 +0000
Subject: [PATCH 1/2] [OpenMP Runtime] Remove optimization skipping reduction
 struct initialization

This commit removes an optimization that skips the initialization of the
reduction struct if the number of threads in a team is 1. This optimization
caused a bug with Hidden Helper Threads. When the task group is initially
initialized by the master thread but a Hidden Helper Thread executes a target
nowait region, it requires the reduction struct initialization to properly
accumulate the data.

This commit also adds a LIT test for issue #57522 to ensure that the issue is
properly addressed and that the optimization removal does not introduce any
regressions.

Fixes: #57522
---
 .../offloading/task_in_reduction_target.c     | 34 +++++++++++++++++++
 openmp/runtime/src/kmp_tasking.cpp            |  7 +---
 2 files changed, 35 insertions(+), 6 deletions(-)
 create mode 100644 openmp/libomptarget/test/offloading/task_in_reduction_target.c

diff --git a/openmp/libomptarget/test/offloading/task_in_reduction_target.c b/openmp/libomptarget/test/offloading/task_in_reduction_target.c
new file mode 100644
index 000000000000000..45b426477020d8f
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/task_in_reduction_target.c
@@ -0,0 +1,34 @@
+// RUN: %libomptarget-compile-generic && \
+// RUN: %libomptarget-run-generic
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+
+  int num_devices = omp_get_num_devices();
+
+  // No target device, just return
+  if (num_devices == 0) {
+    printf("PASS\n");
+    return 0;
+  }
+
+  double sum = 999;
+  double A = 311;
+
+#pragma omp taskgroup task_reduction(+ : sum)
+  {
+#pragma omp target map(to : A) in_reduction(+ : sum) device(0) nowait
+    { sum += A; }
+
+#pragma omp target map(to : A) in_reduction(+ : sum) device(1) nowait
+    { sum += A; }
+  }
+
+  printf("PASS\n");
+  return EXIT_SUCCESS;
+}
+
+// CHECK: PASS
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fefa609927e8933..fcbab2bf5d55b81 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -2512,11 +2512,6 @@ void *__kmp_task_reduction_init(int gtid, int num, T *data) {
   KMP_ASSERT(tg != NULL);
   KMP_ASSERT(data != NULL);
   KMP_ASSERT(num > 0);
-  if (nth == 1) {
-    KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
-                  gtid, tg));
-    return (void *)tg;
-  }
   KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
                 gtid, tg, num));
   arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
@@ -2699,6 +2694,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
         return p_priv[tid];
       }
     }
+    KMP_ASSERT(tg->parent != NULL);
     tg = tg->parent;
     arr = (kmp_taskred_data_t *)(tg->reduce_data);
     num = tg->reduce_num_data;
@@ -2711,7 +2707,6 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
 // Called from __kmpc_end_taskgroup()
 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
   kmp_int32 nth = th->th.th_team_nproc;
-  KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
   kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
   kmp_int32 num = tg->reduce_num_data;
   for (int i = 0; i < num; ++i) {

>From 75f74e327a8f68b87a9d33c84c537363100b2444 Mon Sep 17 00:00:00 2001
From: Rodrigo Ceccato <rodrigoceccatodefreitas at gmail.com>
Date: Fri, 8 Sep 2023 17:34:27 +0000
Subject: [PATCH 2/2] Apply review suggestions

* Add early return back if nth=1, but skip if hidden helper threads
  enabled

* Use KMP_ASSERT(tg->parent) instead of KMP_ASSERT(tg->parent != NULL)
---
 openmp/runtime/src/kmp_tasking.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fcbab2bf5d55b81..8fd4edf1e4406cc 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -2512,6 +2512,11 @@ void *__kmp_task_reduction_init(int gtid, int num, T *data) {
   KMP_ASSERT(tg != NULL);
   KMP_ASSERT(data != NULL);
   KMP_ASSERT(num > 0);
+  if (nth == 1 && !__kmp_enable_hidden_helper) {
+    KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
+                  gtid, tg));
+    return (void *)tg;
+  }
   KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
                 gtid, tg, num));
   arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
@@ -2694,7 +2699,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
         return p_priv[tid];
       }
     }
-    KMP_ASSERT(tg->parent != NULL);
+    KMP_ASSERT(tg->parent);
     tg = tg->parent;
     arr = (kmp_taskred_data_t *)(tg->reduce_data);
     num = tg->reduce_num_data;
@@ -2707,6 +2712,10 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
 // Called from __kmpc_end_taskgroup()
 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
   kmp_int32 nth = th->th.th_team_nproc;
+  KMP_DEBUG_ASSERT(
+      nth > 1 ||
+      __kmp_enable_hidden_helper); // should not be called if nth == 1 unless we
+                                   // are ussing hidden helper threads
   kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
   kmp_int32 num = tg->reduce_num_data;
   for (int i = 0; i < num; ++i) {



More information about the Openmp-commits mailing list