[clang] [flang] [llvm] [mlir] [OpenMP][flang] Add initial support for by-ref reductions on the GPU (PR #165714)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 30 06:02:32 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-openmp

Author: Kareem Ergawy (ergawy)

<details>
<summary>Changes</summary>

Adds initial support for GPU by-ref reductions. In particular, this diff adds support for reductions on scalar allocatables where reductions happen on loops nested in `target` regions. For example:

```fortran
  integer :: i
  real, allocatable :: scalar_alloc

  allocate(scalar_alloc)
  scalar_alloc = 0

  !$omp target map(tofrom: scalar_alloc)
  !$omp parallel do reduction(+: scalar_alloc)
  do i = 1, 1000000
    scalar_alloc = scalar_alloc + 1
  end do
  !$omp end target
```

This PR supports by-ref reductions on the intra- and inter-warp levels.

So far, there are still steps to be takens for full support of by-ref reductions, for example:
* Support inter-block value combination is still not supported. Therefore, `target teams distribute parallel do` is still not supported.
* Support for dynamically-sized arrays still needs to be added.
* Support for more than one allocatable/array on the same `reduction` clause.

---

Patch is 54.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165714.diff


34 Files Affected:

- (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+2-2) 
- (modified) flang/include/flang/Optimizer/Dialect/FIROps.td (+2-1) 
- (modified) flang/lib/Lower/Support/ReductionProcessor.cpp (+6-1) 
- (modified) flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp (+2-1) 
- (modified) flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/parallel-reduction-array.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/parallel-reduction-array2.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/parallel-reduction3.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/reduction-array-intrinsic.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/sections-array-reduction.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-array-lb.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-array-lb2.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-array.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90 (+1-1) 
- (modified) flang/test/Lower/do_concurrent_reduce_allocatable.f90 (+1-1) 
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+17-7) 
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+119-35) 
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+3-1) 
- (modified) mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp (+2-1) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+20-4) 
- (added) mlir/test/Target/LLVMIR/allocatable_gpu_reduction.mlir (+92) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir (+2-4) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+4-4) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 


``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index fddeba98adccc..9a8c75073aa4c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1784,8 +1784,8 @@ void CGOpenMPRuntimeGPU::emitReduction(
 
   llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
       cantFail(OMPBuilder.createReductionsGPU(
-          OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
-          llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
+          OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, {}, false,
+          TeamsReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
           CGF.getTarget().getGridValue(),
           C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc));
   CGF.Builder.restoreIP(AfterIP);
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 58a317cf5d691..ff4dab1136ee9 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -3743,7 +3743,8 @@ def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove,
   }];
 
   let arguments = (ins SymbolNameAttr:$sym_name,
-                       TypeAttr:$type);
+                       TypeAttr:$type,
+                       OptionalAttr<TypeAttr>:$byref_element_type);
 
   let regions = (region MaxSizedRegion<1>:$allocRegion,
                         AnyRegion:$initializerRegion,
diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp
index 605a5b6b20b94..e02cd8fac823b 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -573,10 +573,15 @@ OpType ReductionProcessor::createDeclareReduction(
 
   mlir::OpBuilder modBuilder(module.getBodyRegion());
   mlir::Type valTy = fir::unwrapRefType(type);
+  mlir::TypeAttr boxedTy{};
+
   if (!isByRef)
     type = valTy;
 
-  decl = OpType::create(modBuilder, loc, reductionOpName, type);
+  if (isByRef)
+    boxedTy = mlir::TypeAttr::get(fir::unwrapPassByRefType(valTy));
+
+  decl = OpType::create(modBuilder, loc, reductionOpName, type, boxedTy);
   createReductionAllocAndInitRegions(converter, loc, decl, redId, type,
                                      isByRef);
 
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 1229018bd9b3e..11609ea7b6040 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -851,7 +851,8 @@ class DoConcurrentConversion
         if (!ompReducer) {
           ompReducer = mlir::omp::DeclareReductionOp::create(
               rewriter, firReducer.getLoc(), ompReducerName,
-              firReducer.getTypeAttr().getValue());
+              firReducer.getTypeAttr().getValue(),
+              firReducer.getByrefElementTypeAttr());
 
           cloneFIRRegionToOMP(rewriter, firReducer.getAllocRegion(),
                               ompReducer.getAllocRegion());
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
index 4b6a643f94059..4c7b6ac5f5f9b 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
@@ -22,7 +22,7 @@ subroutine red_and_delayed_private
 ! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : i32
 
 ! CHECK-LABEL: omp.declare_reduction
-! CHECK-SAME: @[[REDUCTION_SYM:.*]] : !fir.ref<i32> alloc
+! CHECK-SAME: @[[REDUCTION_SYM:.*]] : !fir.ref<i32> attributes {byref_element_type = i32} alloc
 
 ! CHECK-LABEL: _QPred_and_delayed_private
 ! CHECK: omp.parallel
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90
index 41c7d69ebb3ba..f56875dcb518b 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-allocatable-array.f90
@@ -18,7 +18,7 @@ program reduce
 
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> attributes {byref_element_type = !fir.array<?xi32>} alloc {
 ! CHECK:           %[[VAL_10:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
 ! CHECK:           omp.yield(%[[VAL_10]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90
index aa91e1e0e8b15..d9ba3bed464f8 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90
@@ -12,7 +12,7 @@ program reduce
 
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_3x2xi32 : !fir.ref<!fir.box<!fir.array<3x2xi32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_3x2xi32 : !fir.ref<!fir.box<!fir.array<3x2xi32>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_15:.*]] = fir.alloca !fir.box<!fir.array<3x2xi32>>
 ! CHECK:           omp.yield(%[[VAL_15]] : !fir.ref<!fir.box<!fir.array<3x2xi32>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array.f90
index 59595de338d50..636660f279e85 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-array.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-array.f90
@@ -17,7 +17,7 @@ program reduce
 print *,i
 end program
 
-! CPU-LABEL:   omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> alloc {
+! CPU-LABEL:   omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> attributes {byref_element_type = !fir.array<3xi32>} alloc {
 ! CPU:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
 ! CPU:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
 ! CPU-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90
index 14338c6f50817..9cf8a63427ed1 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-array2.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90
@@ -13,7 +13,7 @@ program reduce
 print *,i
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
 ! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90
index 36344458d1cae..3de2ba8f61f8e 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90
@@ -19,7 +19,7 @@ program reduce
 
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> attributes {byref_element_type = !fir.array<?xi32>} alloc {
 ! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?xi32>>>
 ! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90
index 9af18378f0ae0..da337378862be 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction3.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90
@@ -1,7 +1,7 @@
 ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
 ! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90 b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
index 8b94d51f986f5..4a0593ff9eca4 100644
--- a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
+++ b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
@@ -9,7 +9,7 @@ subroutine max_array_reduction(l, r)
   !$omp end parallel
 end subroutine
 
-! CHECK-LABEL:   omp.declare_reduction @max_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @max_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
 ! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/sections-array-reduction.f90 b/flang/test/Lower/OpenMP/sections-array-reduction.f90
index 2f2808cebfc0c..0dbe9e3673395 100644
--- a/flang/test/Lower/OpenMP/sections-array-reduction.f90
+++ b/flang/test/Lower/OpenMP/sections-array-reduction.f90
@@ -14,7 +14,7 @@ subroutine sectionsReduction(x)
 end subroutine
 
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> {{.*}} alloc {
 ! [...]
 ! CHECK:           omp.yield
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 b/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90
index 18a4f75b86309..3a63bb09c59de 100644
--- a/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90
+++ b/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90
@@ -1,7 +1,7 @@
 ! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
 
-! CHECK-LABEL:  omp.declare_reduction @add_reduction_byref_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> alloc {
+! CHECK-LABEL:  omp.declare_reduction @add_reduction_byref_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> {{.*}} alloc {
 !                 [...]
 ! CHECK:          omp.yield
 ! CHECK-LABEL:  } init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
index 2cd953de0dffa..ed81577ecce16 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
@@ -32,7 +32,7 @@ program reduce15
   print *,"min: ", mins
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @min_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @min_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
 ! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK-LABEL:   } init {
@@ -93,7 +93,7 @@ program reduce15
 ! CHECK:           omp.yield
 ! CHECK:         }
 
-! CHECK-LABEL:   omp.declare_reduction @max_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @max_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
 ! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90
index 663851cba46c6..d8c0a36db126e 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable.f90
@@ -18,7 +18,7 @@ program reduce
 
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> attributes {byref_element_type = i32} alloc {
 ! CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
 ! CHECK:           omp.yield(%[[VAL_2]] : !fir.ref<!fir.box<!fir.heap<i32>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
index 209ee9a4e0cef..28acb8f19531f 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
@@ -22,7 +22,7 @@ subroutine reduce(r)
 end subroutine
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxf64 : !fir.ref<!fir.box<!fir.array<?xf64>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxf64 : !fir.ref<!fir.box<!fir.array<?xf64>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
 ! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf64>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-lb.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-lb.f90
index 2233a74600948..ec448cf20f111 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array-lb.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-lb.f90
@@ -11,7 +11,7 @@ program reduce
   !$omp end parallel do
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref<!fir.box<!fir.array<2xi32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref<!fir.box<!fir.array<2xi32>>> {{.*}} alloc {
 ! CHECK:         } combiner {
 ! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>, %[[ARG1:.*]]: !fir.ref<!fir.box<!fir.array<2xi32>>>):
 ! CHECK:           %[[ARR0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-lb2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-lb2.f90
index 211bde19da8db..9da05a290ec21 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array-lb2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-lb2.f90
@@ -19,7 +19,7 @@ subroutine sub(a, lb, ub)
 
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> {{.*}} alloc {
 ! CHECK:         } combiner {
 ! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>, %[[ARG1:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
 ! CHECK:           %[[ARR0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
index afaeba27c5eae..14b657c8e180d 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
@@ -14,7 +14,7 @@ program reduce
 print *,r
 end program
 
-! CHECK-LABEL:  omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref<!fir.box<!fir.array<2xi32>>> alloc {
+! CHECK-LABEL:  omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref<!fir.box<!fir.array<2xi32>>> attributes {byref_element_type = !fir.array<2xi32>} alloc {
 ! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
 ! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)
 ! CHECK-LABEL:  } init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
index 25b2e97a1b7f7..d0a0c38e4ccb1 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
@@ -14,7 +14,7 @@ program reduce
 print *,r
 end program
 
-! CHECK-LABEL:  omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref<!fir.box<!fir.array<2xi32>>> alloc {
+! CHECK-LABEL:  omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref<!fir.box<!fir.array<2xi32>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
 ! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)
 ! CHECK-LABEL:  } init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
index edd2bcb1d6be8..60a162d8f8002 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
@@ -24,7 +24,7 @@ program main
 
 endprogram
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_3x3xf64 : !fir.ref<!fir.box<!fir.array<3x3xf64>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_3x3xf64 : !fir.ref<!fir.box<!fir.array<3x3xf64>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
 ! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90
index 27b726376fbeb..f640f5caddf76 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90
@@ -18,7 +18,7 @@ program reduce_pointer
   deallocate(v)
 end program
 
-! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_ptr_i32 : !fir.ref<!fir.box<!fir.ptr<i32>>> alloc {
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_ptr_i32 : !fir.ref<!fir.box<!fir.ptr<i32>>> {{.*}} alloc {
 ! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.ptr<i32>>
 ! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.ptr<i32>>>)
 ! CHECK-LABEL:   } init {
diff --git a/flang/test/Lower/do_concurrent_reduce_allocatable.f90 b/flang/test/Lower/do_concurrent_reduce_allocatable.f90
index 873fd10dd1b97..4fb67c094b594 100644
--- a/flang/test/Lower/do_concurrent_reduce_allocatable.f90
+++ b/flang/test/Lower/do_concurrent_reduce_allocatable.f90
@@ -8,7 +8,7 @@ subroutine do_concurrent_allocatable
   end do
 end subroutine
 
-! CHECK: fir.declare_reduction @[[RED_OP:.*]] : ![[RED_TYPE:.*]] alloc {
+! CHECK: fir.declare_reduction @[[RED_OP:.*]] : ![[RED_TYPE:.*]] attributes {byref_element_type = !fir.array<?x?xf32>} alloc {
 ! CHECK:   %[[ALLOC:.*]] = fir.alloca
 ! CHECK:   fir.yield(%[[ALLOC]] : ![[RED_TYPE]])
 ! CHECK: } init {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 5331cb5abdc6f..f4192f9b49fd9 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1448,11 +1448,15 @@ class OpenMPIRBuilder {
     ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
                   EvalKind EvaluationKind, ReductionGenCBTy ReductionGen,
                   ReductionGenClangCBTy ReductionGenClang,
-                  ReductionGenAtomicCBTy AtomicReductionGen)
+                  ReductionGenAtomicCBTy AtomicReductionGen,
+                  Type *ByRefAllocatedType = nullptr,
+                  Type *ByRefElementType = nullptr)
         : ElementType(ElementType), Vari...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/165714


More information about the llvm-commits mailing list