[flang-commits] [flang] [mlir] [flang][OpenMP] Lower task reduction modifier (PR #205124)

Sairudra More via flang-commits flang-commits at lists.llvm.org
Mon Jun 22 09:41:44 PDT 2026


https://github.com/Saieiei updated https://github.com/llvm/llvm-project/pull/205124

>From 5ccce0868fb556e5eea229e46c9717c08c0e22d9 Mon Sep 17 00:00:00 2001
From: Sairudra More <sairudra60 at gmail.com>
Date: Mon, 22 Jun 2026 04:20:04 -0500
Subject: [PATCH] [flang][OpenMP] Lower task reduction modifier

Propagate the OpenMP reduction task modifier through Flang lowering and
translate it to the LLVM IR runtime calls for parallel, worksharing-loop,
and sections constructs.

The lowering now preserves reduction(mod: task, ...) instead of stopping
at a TODO. The LLVM IR translation emits
__kmpc_taskred_modifier_init after reduction private initialization and
__kmpc_task_reduction_modifier_fini before the final reduction combine.
Unsupported modifier shapes, including by-ref task modifier reductions,
remain gated to TODO diagnostics.

Add lowering and LLVM IR translation tests for the supported constructs.
---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |   9 +-
 .../test/Lower/OpenMP/Todo/reduction-task.f90 |  12 --
 .../Lower/OpenMP/parallel-reduction-task.f90  |  37 ++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 156 ++++++++++++++--
 .../openmp-reduction-task-modifier.mlir       | 171 ++++++++++++++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  28 +++
 6 files changed, 380 insertions(+), 33 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-task.f90
 create mode 100644 flang/test/Lower/OpenMP/parallel-reduction-task.f90
 create mode 100644 mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index eb416d103fbe0..4f19dfb98024d 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -2052,12 +2052,9 @@ bool ClauseProcessor::processReduction(
 
         auto mod = std::get<std::optional<ReductionModifier>>(clause.t);
         if (mod.has_value()) {
-          if (mod.value() == ReductionModifier::Task)
-            TODO(currentLocation, "Reduction modifier `task` is not supported");
-          else
-            result.reductionMod = mlir::omp::ReductionModifierAttr::get(
-                converter.getFirOpBuilder().getContext(),
-                translateReductionModifier(mod.value()));
+          result.reductionMod = mlir::omp::ReductionModifierAttr::get(
+              converter.getFirOpBuilder().getContext(),
+              translateReductionModifier(mod.value()));
         }
 
         ReductionProcessor rp;
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
deleted file mode 100644
index adc8de00a9b7a..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90
+++ /dev/null
@@ -1,12 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction modifier `task` is not supported
-subroutine reduction_task()
-  integer :: i
-  i = 0
-
-  !$omp parallel reduction(task, +:i)
-  i = i + 1
-  !$omp end parallel
-end subroutine reduction_task
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-task.f90 b/flang/test/Lower/OpenMP/parallel-reduction-task.f90
new file mode 100644
index 0000000000000..ee46b0044249f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-task.f90
@@ -0,0 +1,37 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+! Check that the `task` reduction modifier is lowered to the `task`
+! reduction modifier attribute on the parallel and worksharing constructs.
+
+! CHECK-LABEL: func.func @_QPreduction_task_parallel
+subroutine reduction_task_parallel()
+  integer :: i
+  i = 0
+  ! CHECK: omp.parallel reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+  !$omp parallel reduction(task, +:i)
+  i = i + 1
+  !$omp end parallel
+end subroutine reduction_task_parallel
+
+! CHECK-LABEL: func.func @_QPreduction_task_do
+subroutine reduction_task_do()
+  integer :: i, j
+  i = 0
+  ! CHECK: omp.wsloop {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+  !$omp do reduction(task, +:i)
+  do j = 1, 10
+    i = i + 1
+  end do
+  !$omp end do
+end subroutine reduction_task_do
+
+! CHECK-LABEL: func.func @_QPreduction_task_sections
+subroutine reduction_task_sections()
+  integer :: i
+  i = 0
+  ! CHECK: omp.sections {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+  !$omp sections reduction(task, +:i)
+  i = i + 1
+  !$omp end sections
+end subroutine reduction_task_sections
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 703f72d1ab5bc..18a234b6720dd 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -392,8 +392,26 @@ static LogicalResult checkImplementationStatus(Operation &op) {
           op.getReductionSyms())
         result = todo("reduction");
     if (op.getReductionMod() &&
-        op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
-      result = todo("reduction with modifier");
+        op.getReductionMod().value() != omp::ReductionModifier::defaultmod) {
+      omp::ReductionModifier mod = op.getReductionMod().value();
+      // The `task` reduction modifier is supported on the parallel and
+      // worksharing (do/for and sections) constructs. Other modifiers, and the
+      // `task` modifier on other constructs, are not yet implemented.
+      bool taskModifierSupported =
+          mod == omp::ReductionModifier::task &&
+          isa<omp::ParallelOp, omp::WsloopOp, omp::SectionsOp>(op);
+      if (!taskModifierSupported) {
+        result = todo("reduction with modifier");
+      } else if (auto byref = op.getReductionByref()) {
+        // The task reduction modifier lowering only handles non-byref
+        // reductions for now.
+        for (bool isByRef : *byref)
+          if (isByRef) {
+            result = todo("task reduction modifier with by-ref reduction");
+            break;
+          }
+      }
+    }
   };
   auto checkTaskReductionByref = [&todo](auto op, LogicalResult &result) {
     if (auto byrefAttr = op.getTaskReductionByref())
@@ -2024,6 +2042,23 @@ static bool constructIsCancellable(Operation *op) {
       .wasInterrupted();
 }
 
+// Forward declarations for the task-reduction helpers defined alongside the
+// omp.taskgroup lowering further down in this file. These are shared by the
+// `reduction(task, ...)` modifier lowering on the parallel/worksharing
+// constructs and by the omp.taskgroup / omp.taskloop.context task_reduction
+// lowering. When \p isModifier is set, `__kmpc_taskred_modifier_init` is
+// emitted (opening a task-reduction scope) instead of `__kmpc_taskred_init`,
+// with \p isWorksharing selecting the runtime `is_ws` argument.
+static llvm::Value *emitTaskReductionInitCall(
+    ArrayRef<omp::DeclareReductionOp> redDecls,
+    ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
+    llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
+    LLVM::ModuleTranslation &moduleTranslation, bool isModifier = false,
+    bool isWorksharing = false);
+static void
+emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder,
+                              LLVM::ModuleTranslation &moduleTranslation);
+
 static LogicalResult
 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
                    LLVM::ModuleTranslation &moduleTranslation) {
@@ -2057,6 +2092,9 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
           isByRef)))
     return failure();
 
+  bool isTaskReductionMod =
+      sectionsOp.getReductionMod() == omp::ReductionModifier::task;
+
   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
 
   for (Operation &op : *sectionsOp.getRegion().begin()) {
@@ -2096,6 +2134,19 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
   if (sectionCBs.empty())
     return success();
 
+  // For `reduction(task, ...)` open a task-reduction scope for the worksharing
+  // region. Participating explicit tasks accumulate into the per-thread private
+  // copies, which the worksharing reduction then combines across threads. This
+  // is emitted only after the empty-sections early return above, so it stays
+  // balanced with the matching fini emitted after the sections region.
+  if (isTaskReductionMod &&
+      !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+                                 "__omp_taskred_mod_", builder, allocaIP,
+                                 moduleTranslation, /*isModifier=*/true,
+                                 /*isWorksharing=*/true))
+    return sectionsOp.emitError(
+        "failed to emit task reduction modifier initialization");
+
   assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
 
   // TODO: Perform appropriate actions according to the data-sharing
@@ -2125,6 +2176,11 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
 
   builder.restoreIP(*afterIP);
 
+  // Close the task-reduction scope before combining the worksharing copies.
+  if (isTaskReductionMod)
+    emitTaskReductionModifierFini(/*isWorksharing=*/true, builder,
+                                  moduleTranslation);
+
   // Process the reductions if required.
   return createReductionsAndCleanup(
       sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
@@ -3484,15 +3540,6 @@ computeTaskloopBounds(omp::LoopNestOp loopOp, llvm::IRBuilderBase &builder,
   return llvm::Error::success();
 }
 
-// Forward declaration: defined alongside the taskgroup task_reduction
-// lowering further down in this file. Shared between omp.taskgroup and
-// omp.taskloop.context translation.
-static llvm::Value *emitTaskReductionInitCall(
-    ArrayRef<omp::DeclareReductionOp> redDecls,
-    ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
-    llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
-    LLVM::ModuleTranslation &moduleTranslation);
-
 // Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
@@ -4060,8 +4107,11 @@ emitTaskReductionCombFn(omp::DeclareReductionOp decl, StringRef baseName,
 /// \p allocaIP. \p helperNamePrefix is used to disambiguate the generated
 /// init/combiner helper symbol names between taskgroup and taskloop callers.
 ///
-/// Returns the `ptr` value produced by `__kmpc_taskred_init` (the taskgroup
-/// reduction handle), or null on failure.
+/// When \p isModifier is false, emits `__kmpc_taskred_init` and returns the
+/// `ptr` value it produces (the taskgroup reduction handle). When \p isModifier
+/// is true, emits `__kmpc_taskred_modifier_init` instead to open a
+/// task-reduction scope for a parallel or worksharing construct, passing
+/// \p isWorksharing as the runtime `is_ws` argument. Returns null on failure.
 ///
 /// Only the non-byref form is handled here. Byref reductions have already
 /// been rejected by `checkImplementationStatus`.
@@ -4069,7 +4119,8 @@ static llvm::Value *emitTaskReductionInitCall(
     ArrayRef<omp::DeclareReductionOp> redDecls,
     ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
     llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
-    LLVM::ModuleTranslation &moduleTranslation) {
+    LLVM::ModuleTranslation &moduleTranslation, bool isModifier,
+    bool isWorksharing) {
   assert(redDecls.size() == origPtrs.size() &&
          "expected one orig pointer per reduction decl");
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
@@ -4138,7 +4189,7 @@ static llvm::Value *emitTaskReductionInitCall(
     storeField(6, llvm::ConstantInt::get(i32Ty, 0));      // flags
   }
 
-  // Emit call: __kmpc_taskred_init(gtid, num, &arr).
+  // Emit the runtime call that registers the task reduction data.
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   uint32_t srcLocSize;
   llvm::Constant *srcLocStr =
@@ -4146,12 +4197,45 @@ static llvm::Value *emitTaskReductionInitCall(
   llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize);
   ompBuilder->updateToLocation(ompLoc);
   llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident);
+  if (isModifier) {
+    // __kmpc_taskred_modifier_init(loc, gtid, is_ws, num, &arr) opens a
+    // task-reduction scope for the enclosing parallel/worksharing region.
+    llvm::FunctionCallee modInit = ompBuilder->getOrCreateRuntimeFunction(
+        *llvmModule, llvm::omp::OMPRTL___kmpc_taskred_modifier_init);
+    return builder.CreateCall(modInit,
+                              {ident, gtid,
+                               builder.getInt32(isWorksharing ? 1 : 0),
+                               builder.getInt32(n), arrAlloca},
+                              ".taskred.desc");
+  }
+  // __kmpc_taskred_init(gtid, num, &arr).
   llvm::FunctionCallee taskredInit = ompBuilder->getOrCreateRuntimeFunction(
       *llvmModule, llvm::omp::OMPRTL___kmpc_taskred_init);
   return builder.CreateCall(taskredInit, {gtid, builder.getInt32(n), arrAlloca},
                             ".taskred.desc");
 }
 
+/// Emits `__kmpc_task_reduction_modifier_fini(loc, gtid, is_ws)` at the current
+/// builder insertion point, closing the task-reduction scope opened by the
+/// `task` reduction modifier on a parallel or worksharing construct.
+static void
+emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder,
+                              LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  uint32_t srcLocSize;
+  llvm::Constant *srcLocStr =
+      ompBuilder->getOrCreateSrcLocStr(ompLoc, srcLocSize);
+  llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize);
+  ompBuilder->updateToLocation(ompLoc);
+  llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident);
+  llvm::FunctionCallee fini = ompBuilder->getOrCreateRuntimeFunction(
+      *llvmModule, llvm::omp::OMPRTL___kmpc_task_reduction_modifier_fini);
+  builder.CreateCall(fini,
+                     {ident, gtid, builder.getInt32(isWorksharing ? 1 : 0)});
+}
+
 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
@@ -4334,6 +4418,19 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
                                reductionVariableMap, isByRef, deferredStores)))
     return failure();
 
+  // For `reduction(task, ...)` open a task-reduction scope for the worksharing
+  // loop. Participating explicit tasks accumulate into the per-thread private
+  // copies, which the worksharing reduction then combines across threads.
+  bool isTaskReductionMod =
+      wsloopOp.getReductionMod() == omp::ReductionModifier::task;
+  if (isTaskReductionMod &&
+      !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+                                 "__omp_taskred_mod_", builder, allocaIP,
+                                 moduleTranslation, /*isModifier=*/true,
+                                 /*isWorksharing=*/true))
+    return wsloopOp.emitError(
+        "failed to emit task reduction modifier initialization");
+
   // TODO: Handle doacross loops when the ordered clause has a parameter.
   bool isOrdered = wsloopOp.getOrdered().has_value();
   std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
@@ -4443,6 +4540,11 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   // Set the correct branch target for task cancellation
   popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
 
+  // Close the task-reduction scope before the worksharing reduction combine.
+  if (isTaskReductionMod)
+    emitTaskReductionModifierFini(/*isWorksharing=*/true, builder,
+                                  moduleTranslation);
+
   // Process the reductions if required.
   if (failed(createReductionsAndCleanup(
           wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
@@ -4475,6 +4577,13 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
   SmallVector<llvm::Value *> privateReductionVariables(
       opInst.getNumReductionVars());
   SmallVector<DeferredStore> deferredStores;
+  // Only open a task-reduction scope when the `task` modifier is present and
+  // there are reduction variables to combine; otherwise the matching fini in
+  // the reduction-combine path (guarded by getNumReductionVars() > 0) would be
+  // skipped, leaving the modifier init unbalanced.
+  bool isTaskReductionMod =
+      opInst.getReductionMod() == omp::ReductionModifier::task &&
+      opInst.getNumReductionVars() > 0;
 
   auto bodyGenCB =
       [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
@@ -4522,6 +4631,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
                               reductionVariableMap, isByRef, deferredStores)))
       return llvm::make_error<PreviouslyReportedError>();
 
+    // For `reduction(task, ...)` open a task-reduction scope so participating
+    // explicit tasks accumulate into the per-thread private copies; the
+    // parallel reduction then combines those copies across the team.
+    if (isTaskReductionMod &&
+        !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+                                   "__omp_taskred_mod_", builder, allocaIP,
+                                   moduleTranslation, /*isModifier=*/true,
+                                   /*isWorksharing=*/false))
+      return llvm::createStringError(
+          "failed to emit task reduction modifier initialization");
+
     // Save the alloca insertion point on ModuleTranslation stack for use in
     // nested regions.
     LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
@@ -4549,6 +4669,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
       // Move to region cont block
       builder.SetInsertPoint((*regionBlock)->getTerminator());
 
+      // Close the task-reduction scope before the per-thread reduction
+      // contributions are combined across the team.
+      if (isTaskReductionMod)
+        emitTaskReductionModifierFini(/*isWorksharing=*/false, builder,
+                                      moduleTranslation);
+
       // Generate reductions from info
       llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
       builder.SetInsertPoint(tempTerminator);
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir
new file mode 100644
index 0000000000000..aa44315b284f0
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir
@@ -0,0 +1,171 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// The `task` reduction modifier opens a task-reduction scope around the
+// parallel / worksharing region. Verify that
+// __kmpc_taskred_modifier_init is emitted (with the correct `is_ws` argument)
+// after the reduction privates are set up, and that
+// __kmpc_task_reduction_modifier_fini is emitted before the reduction combine.
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @parallel_task_reduction(%x: !llvm.ptr) {
+  omp.parallel reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK: %kmp_taskred_input_t = type { ptr, ptr, i64, ptr, ptr, ptr, i32 }
+
+// On a parallel construct the modifier init uses is_ws = 0.
+// CHECK-LABEL: define internal void @parallel_task_reduction..omp_par
+// CHECK:         %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 1, ptr %[[ARR]])
+// CHECK:         call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 0)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @wsloop_task_reduction(%x: !llvm.ptr) {
+  %lb = llvm.mlir.constant(1 : i32) : i32
+  %ub = llvm.mlir.constant(10 : i32) : i32
+  %step = llvm.mlir.constant(1 : i32) : i32
+  omp.wsloop reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// On a worksharing construct the modifier init uses is_ws = 1.
+// CHECK-LABEL: define void @wsloop_task_reduction(
+// CHECK:         %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]])
+// CHECK:         call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @sections_task_reduction(%x: !llvm.ptr) {
+  omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.section {
+    ^bb0(%arg: !llvm.ptr):
+      omp.terminator
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// On a worksharing (sections) construct the modifier init uses is_ws = 1.
+// CHECK-LABEL: define void @sections_task_reduction(
+// CHECK:         %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]])
+// CHECK:         call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @parallel_two_task_reductions(%x: !llvm.ptr, %y: !llvm.ptr) {
+  omp.parallel reduction(mod: task, @add_i32 %x -> %p0, @add_i32 %y -> %p1 : !llvm.ptr, !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// With two task-modifier reductions the descriptor array holds two entries and
+// the modifier init receives num = 2 (is_ws = 0 on the parallel construct).
+// CHECK-LABEL: define internal void @parallel_two_task_reductions..omp_par
+// CHECK:         %[[ARR:.+]] = alloca [2 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 2, ptr %[[ARR]])
+
+// -----
+
+// An empty omp.sections (only a terminator, no omp.section) hits the
+// empty-sections early return, so no task-reduction scope is opened: neither
+// the modifier init nor the matching fini may be emitted.
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @empty_sections_task_reduction(%x: !llvm.ptr) {
+  omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define void @empty_sections_task_reduction(
+// CHECK-NOT:     @__kmpc_taskred_modifier_init
+// CHECK-NOT:     @__kmpc_task_reduction_modifier_fini
+// CHECK:         ret void
+
+// -----
+
+// A verifier-valid omp.parallel that carries reduction_mod = task but has no
+// reduction variables must not open a task-reduction scope.
+
+llvm.func @parallel_task_mod_no_reductions() {
+  "omp.parallel"() <{operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0>, reduction_mod = #omp<reduction_modifier(task)>}> ({
+    omp.terminator
+  }) : () -> ()
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @parallel_task_mod_no_reductions..omp_par
+// CHECK-NOT:     @__kmpc_taskred_modifier_init
+// CHECK-NOT:     @__kmpc_task_reduction_modifier_fini
+// CHECK:         ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 377a5bb799be4..4d23fcafc80bd 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -134,6 +134,34 @@ llvm.func @scan_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
 
 // -----
 
+omp.declare_reduction @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+  %2 = llvm.load %arg3 : !llvm.ptr -> f32
+  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
+  omp.yield
+}
+llvm.func @parallel_task_reduction_modifier_byref(%x : !llvm.ptr) {
+  // expected-error at below {{not yet implemented: Unhandled clause task reduction modifier with by-ref reduction in omp.parallel operation}}
+  // expected-error at below {{LLVM Translation failed for operation: omp.parallel}}
+  omp.parallel reduction(mod: task, byref @add_f32 %x -> %prv : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// -----
+
 llvm.func @single_allocate(%x : !llvm.ptr) {
   // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.single operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.single}}



More information about the flang-commits mailing list