[flang-commits] [flang] [flang][DoConcurrent] Map reduction variables as tofrom ByRef for device offloading (PR #189378)

Kareem Ergawy via flang-commits flang-commits at lists.llvm.org
Mon Mar 30 06:27:01 PDT 2026


https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/189378

Scalar reduction variables in `do concurrent reduce(...)` were being mapped with `implicit ByCopy` when offloaded to device, because `genMapInfoOpForLiveIn` treated all trivial types uniformly. This caused the reduction result to be silently dropped — the device-side reduction would compute the correct value but never write it back to the host.

Fix by detecting reduction variables and forcing `implicit tofrom ByRef` mapping, matching the behavior of explicit
`!$omp target teams distribute parallel do reduction(...)`.

Co-authored-by: ergawy <kareem.ergawy at amd.com>
Co-authored-by: Claude <noreply at anthropic.com>
Made-with: Cursor

>From 6b5751966dbed27af7b72bed50b51152b7d88c93 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 30 Mar 2026 06:19:51 -0500
Subject: [PATCH] [flang][DoConcurrent] Map reduction variables as tofrom ByRef
 for device offloading
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Scalar reduction variables in `do concurrent reduce(...)` were being
mapped with `implicit ByCopy` when offloaded to device, because
`genMapInfoOpForLiveIn` treated all trivial types uniformly. This caused
the reduction result to be silently dropped — the device-side reduction
would compute the correct value but never write it back to the host.

Fix by detecting reduction variables and forcing `implicit tofrom ByRef`
mapping, matching the behavior of explicit
`!$omp target teams distribute parallel do reduction(...)`.

Co-authored-by: ergawy <kareem.ergawy at amd.com>
Co-authored-by: Claude <noreply at anthropic.com>
Made-with: Cursor
---
 .../OpenMP/DoConcurrentConversion.cpp         | 14 ++++--
 .../DoConcurrent/reduce_device.mlir           |  1 +
 .../DoConcurrent/reduce_device_min.f90        | 45 +++++++++++++++++++
 3 files changed, 57 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Transforms/DoConcurrent/reduce_device_min.f90

diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 876a54d29837e..c83cffbb72f06 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -312,9 +312,13 @@ class DoConcurrentConversion
           rewriter,
           fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>()));
 
+      llvm::SmallDenseSet<mlir::Value> reduceVars(loop.getReduceVars().begin(),
+                                                   loop.getReduceVars().end());
+
       for (mlir::Value liveIn : loopNestLiveIns) {
         targetClauseOps.mapVars.push_back(
-            genMapInfoOpForLiveIn(builder, liveIn));
+            genMapInfoOpForLiveIn(builder, liveIn,
+                                  reduceVars.contains(liveIn)));
         liveInShapeInfoMap.insert(
             {liveIn, TargetDeclareShapeCreationInfo(liveIn)});
       }
@@ -541,7 +545,8 @@ class DoConcurrentConversion
   }
 
   mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder,
-                                             mlir::Value liveIn) const {
+                                             mlir::Value liveIn,
+                                             bool isReductionVar = false) const {
     mlir::Value rawAddr = liveIn;
     llvm::StringRef name;
 
@@ -574,7 +579,10 @@ class DoConcurrentConversion
     mlir::omp::VariableCaptureKind captureKind =
         mlir::omp::VariableCaptureKind::ByRef;
 
-    if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
+    if (isReductionVar) {
+      mapFlag |= mlir::omp::ClauseMapFlags::to;
+      mapFlag |= mlir::omp::ClauseMapFlags::from;
+    } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
       captureKind = mlir::omp::VariableCaptureKind::ByCopy;
     } else if (!fir::isa_builtin_cptr_type(eleType)) {
       mapFlag |= mlir::omp::ClauseMapFlags::to;
diff --git a/flang/test/Transforms/DoConcurrent/reduce_device.mlir b/flang/test/Transforms/DoConcurrent/reduce_device.mlir
index 3e46692a15dca..c6456fe70dd27 100644
--- a/flang/test/Transforms/DoConcurrent/reduce_device.mlir
+++ b/flang/test/Transforms/DoConcurrent/reduce_device.mlir
@@ -36,6 +36,7 @@ func.func @_QPfoo() {
 
 // CHECK: %[[S_DECL:.*]]:2 = hlfir.declare %6 {uniq_name = "_QFfooEs"}
 // CHECK: %[[S_MAP:.*]] = omp.map.info var_ptr(%[[S_DECL]]#1
+// CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef)
 
 // CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[S_MAP]] -> %[[S_TARGET_ARG:.*]] : {{.*}}) {
 // CHECK:   %[[S_DEV_DECL:.*]]:2 = hlfir.declare %[[S_TARGET_ARG]]
diff --git a/flang/test/Transforms/DoConcurrent/reduce_device_min.f90 b/flang/test/Transforms/DoConcurrent/reduce_device_min.f90
new file mode 100644
index 0000000000000..509207c1db2a8
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/reduce_device_min.f90
@@ -0,0 +1,45 @@
+! Tests that a `do concurrent reduce(min:...)` on a scalar maps the reduction
+! variable as `tofrom ByRef` (not `ByCopy`) when targeting a device. This is
+! needed so the reduced result is written back from the device to the host.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN:   | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN:   | FileCheck %s
+
+subroutine min_reduce(arr, n, min_val)
+    implicit none
+    integer, intent(in) :: n
+    real, intent(in) :: arr(n)
+    real :: min_val
+    integer :: i
+
+    do concurrent (i=1:n) reduce(min:min_val)
+        min_val = min(min_val, arr(i))
+    end do
+end subroutine min_reduce
+
+! CHECK-DAG: omp.declare_reduction @[[RED_SYM:.*\.omp]] : f32 init
+
+! CHECK-LABEL: func.func @_QPmin_reduce
+
+! CHECK: %[[MIN_VAL_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFmin_reduceEmin_val"}
+
+! Verify the reduction variable is mapped tofrom + ByRef (not implicit + ByCopy).
+! CHECK: %[[MIN_VAL_MAP:.*]] = omp.map.info var_ptr(%[[MIN_VAL_DECL]]#1
+! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef)
+! CHECK-SAME: -> !fir.ref<f32> {name = "_QFmin_reduceEmin_val"}
+
+! CHECK: omp.target
+! CHECK-SAME: map_entries({{.*}}%[[MIN_VAL_MAP]] -> %[[MIN_VAL_ARG:[[:alnum:]]+]]{{.*}})
+
+! CHECK: %[[MIN_VAL_DEV:.*]]:2 = hlfir.declare %[[MIN_VAL_ARG]] {{.*}} "_QFmin_reduceEmin_val"
+! CHECK: omp.teams reduction(@[[RED_SYM]] %[[MIN_VAL_DEV]]#0 -> %[[RED_TEAMS:.*]] : !fir.ref<f32>) {
+! CHECK:   omp.parallel {
+! CHECK:     omp.distribute {
+! CHECK:       omp.wsloop reduction(@[[RED_SYM]] %[[RED_TEAMS]] -> %[[RED_WS:.*]] : !fir.ref<f32>) {
+! CHECK:         omp.loop_nest
+! CHECK:       } {omp.composite}
+! CHECK:     } {omp.composite}
+! CHECK:   } {omp.composite}
+! CHECK: }



More information about the flang-commits mailing list