[flang-commits] [flang] [mlir] [flang][OpenMP] Lower task reduction modifier (PR #205124)
Sairudra More via flang-commits
flang-commits at lists.llvm.org
Tue Jun 23 09:32:51 PDT 2026
https://github.com/Saieiei updated https://github.com/llvm/llvm-project/pull/205124
>From fa47675580f912a86bca7fe6e171f96941e5e8df Mon Sep 17 00:00:00 2001
From: Sairudra More <sairudra60 at gmail.com>
Date: Mon, 22 Jun 2026 04:20:04 -0500
Subject: [PATCH] [flang][OpenMP] Lower task reduction modifier
Propagate the OpenMP reduction task modifier through Flang lowering and
translate it to the LLVM IR runtime calls for parallel, worksharing-loop,
and sections constructs.
The lowering now preserves reduction(mod: task, ...) instead of stopping
at a TODO. The LLVM IR translation emits
__kmpc_taskred_modifier_init after reduction private initialization and
__kmpc_task_reduction_modifier_fini before the final reduction combine.
Unsupported modifier shapes, including by-ref task modifier reductions,
remain gated to TODO diagnostics.
Add lowering and LLVM IR translation tests for the supported constructs.
---
flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 9 +-
.../test/Lower/OpenMP/Todo/reduction-task.f90 | 12 -
.../Lower/OpenMP/parallel-reduction-task.f90 | 37 +++
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 158 +++++++++++--
.../openmp-reduction-task-modifier.mlir | 216 ++++++++++++++++++
mlir/test/Target/LLVMIR/openmp-todo.mlir | 28 +++
6 files changed, 427 insertions(+), 33 deletions(-)
delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-task.f90
create mode 100644 flang/test/Lower/OpenMP/parallel-reduction-task.f90
create mode 100644 mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index eb416d103fbe0..4f19dfb98024d 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -2052,12 +2052,9 @@ bool ClauseProcessor::processReduction(
auto mod = std::get<std::optional<ReductionModifier>>(clause.t);
if (mod.has_value()) {
- if (mod.value() == ReductionModifier::Task)
- TODO(currentLocation, "Reduction modifier `task` is not supported");
- else
- result.reductionMod = mlir::omp::ReductionModifierAttr::get(
- converter.getFirOpBuilder().getContext(),
- translateReductionModifier(mod.value()));
+ result.reductionMod = mlir::omp::ReductionModifierAttr::get(
+ converter.getFirOpBuilder().getContext(),
+ translateReductionModifier(mod.value()));
}
ReductionProcessor rp;
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
deleted file mode 100644
index adc8de00a9b7a..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90
+++ /dev/null
@@ -1,12 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction modifier `task` is not supported
-subroutine reduction_task()
- integer :: i
- i = 0
-
- !$omp parallel reduction(task, +:i)
- i = i + 1
- !$omp end parallel
-end subroutine reduction_task
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-task.f90 b/flang/test/Lower/OpenMP/parallel-reduction-task.f90
new file mode 100644
index 0000000000000..ee46b0044249f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-task.f90
@@ -0,0 +1,37 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+! Check that the `task` reduction modifier is lowered to the `task`
+! reduction modifier attribute on the parallel and worksharing constructs.
+
+! CHECK-LABEL: func.func @_QPreduction_task_parallel
+subroutine reduction_task_parallel()
+ integer :: i
+ i = 0
+ ! CHECK: omp.parallel reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+ !$omp parallel reduction(task, +:i)
+ i = i + 1
+ !$omp end parallel
+end subroutine reduction_task_parallel
+
+! CHECK-LABEL: func.func @_QPreduction_task_do
+subroutine reduction_task_do()
+ integer :: i, j
+ i = 0
+ ! CHECK: omp.wsloop {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+ !$omp do reduction(task, +:i)
+ do j = 1, 10
+ i = i + 1
+ end do
+ !$omp end do
+end subroutine reduction_task_do
+
+! CHECK-LABEL: func.func @_QPreduction_task_sections
+subroutine reduction_task_sections()
+ integer :: i
+ i = 0
+ ! CHECK: omp.sections {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+ !$omp sections reduction(task, +:i)
+ i = i + 1
+ !$omp end sections
+end subroutine reduction_task_sections
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 703f72d1ab5bc..bebfbf7790003 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -392,8 +392,26 @@ static LogicalResult checkImplementationStatus(Operation &op) {
op.getReductionSyms())
result = todo("reduction");
if (op.getReductionMod() &&
- op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
- result = todo("reduction with modifier");
+ op.getReductionMod().value() != omp::ReductionModifier::defaultmod) {
+ omp::ReductionModifier mod = op.getReductionMod().value();
+ // The `task` reduction modifier is supported on the parallel and
+ // worksharing (do/for and sections) constructs. Other modifiers, and the
+ // `task` modifier on other constructs, are not yet implemented.
+ bool taskModifierSupported =
+ mod == omp::ReductionModifier::task &&
+ isa<omp::ParallelOp, omp::WsloopOp, omp::SectionsOp>(op);
+ if (!taskModifierSupported) {
+ result = todo("reduction with modifier");
+ } else if (auto byref = op.getReductionByref()) {
+ // The task reduction modifier lowering only handles non-byref
+ // reductions for now.
+ for (bool isByRef : *byref)
+ if (isByRef) {
+ result = todo("task reduction modifier with by-ref reduction");
+ break;
+ }
+ }
+ }
};
auto checkTaskReductionByref = [&todo](auto op, LogicalResult &result) {
if (auto byrefAttr = op.getTaskReductionByref())
@@ -2024,6 +2042,23 @@ static bool constructIsCancellable(Operation *op) {
.wasInterrupted();
}
+// Forward declarations for the task-reduction helpers defined alongside the
+// omp.taskgroup lowering further down in this file. These are shared by the
+// `reduction(task, ...)` modifier lowering on the parallel/worksharing
+// constructs and by the omp.taskgroup / omp.taskloop.context task_reduction
+// lowering. When \p isModifier is set, `__kmpc_taskred_modifier_init` is
+// emitted (opening a task-reduction scope) instead of `__kmpc_taskred_init`,
+// with \p isWorksharing selecting the runtime `is_ws` argument.
+static llvm::Value *emitTaskReductionInitCall(
+ ArrayRef<omp::DeclareReductionOp> redDecls,
+ ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
+ llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
+ LLVM::ModuleTranslation &moduleTranslation, bool isModifier = false,
+ bool isWorksharing = false);
+static void
+emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation);
+
static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -2057,6 +2092,10 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
isByRef)))
return failure();
+ bool isTaskReductionMod =
+ sectionsOp.getReductionMod() == omp::ReductionModifier::task &&
+ sectionsOp.getNumReductionVars() > 0;
+
SmallVector<StorableBodyGenCallbackTy> sectionCBs;
for (Operation &op : *sectionsOp.getRegion().begin()) {
@@ -2096,6 +2135,19 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
if (sectionCBs.empty())
return success();
+ // For `reduction(task, ...)` open a task-reduction scope for the worksharing
+ // region. Participating explicit tasks accumulate into the per-thread private
+ // copies, which the worksharing reduction then combines across threads. This
+ // is emitted only after the empty-sections early return above, so it stays
+ // balanced with the matching fini emitted after the sections region.
+ if (isTaskReductionMod &&
+ !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+ "__omp_taskred_mod_", builder, allocaIP,
+ moduleTranslation, /*isModifier=*/true,
+ /*isWorksharing=*/true))
+ return sectionsOp.emitError(
+ "failed to emit task reduction modifier initialization");
+
assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
// TODO: Perform appropriate actions according to the data-sharing
@@ -2125,6 +2177,11 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
builder.restoreIP(*afterIP);
+ // Close the task-reduction scope before combining the worksharing copies.
+ if (isTaskReductionMod)
+ emitTaskReductionModifierFini(/*isWorksharing=*/true, builder,
+ moduleTranslation);
+
// Process the reductions if required.
return createReductionsAndCleanup(
sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
@@ -3484,15 +3541,6 @@ computeTaskloopBounds(omp::LoopNestOp loopOp, llvm::IRBuilderBase &builder,
return llvm::Error::success();
}
-// Forward declaration: defined alongside the taskgroup task_reduction
-// lowering further down in this file. Shared between omp.taskgroup and
-// omp.taskloop.context translation.
-static llvm::Value *emitTaskReductionInitCall(
- ArrayRef<omp::DeclareReductionOp> redDecls,
- ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
- llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
- LLVM::ModuleTranslation &moduleTranslation);
-
// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
@@ -4060,8 +4108,11 @@ emitTaskReductionCombFn(omp::DeclareReductionOp decl, StringRef baseName,
/// \p allocaIP. \p helperNamePrefix is used to disambiguate the generated
/// init/combiner helper symbol names between taskgroup and taskloop callers.
///
-/// Returns the `ptr` value produced by `__kmpc_taskred_init` (the taskgroup
-/// reduction handle), or null on failure.
+/// When \p isModifier is false, emits `__kmpc_taskred_init` and returns the
+/// `ptr` value it produces (the taskgroup reduction handle). When \p isModifier
+/// is true, emits `__kmpc_taskred_modifier_init` instead to open a
+/// task-reduction scope for a parallel or worksharing construct, passing
+/// \p isWorksharing as the runtime `is_ws` argument. Returns null on failure.
///
/// Only the non-byref form is handled here. Byref reductions have already
/// been rejected by `checkImplementationStatus`.
@@ -4069,7 +4120,8 @@ static llvm::Value *emitTaskReductionInitCall(
ArrayRef<omp::DeclareReductionOp> redDecls,
ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
- LLVM::ModuleTranslation &moduleTranslation) {
+ LLVM::ModuleTranslation &moduleTranslation, bool isModifier,
+ bool isWorksharing) {
assert(redDecls.size() == origPtrs.size() &&
"expected one orig pointer per reduction decl");
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
@@ -4138,7 +4190,7 @@ static llvm::Value *emitTaskReductionInitCall(
storeField(6, llvm::ConstantInt::get(i32Ty, 0)); // flags
}
- // Emit call: __kmpc_taskred_init(gtid, num, &arr).
+ // Emit the runtime call that registers the task reduction data.
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
uint32_t srcLocSize;
llvm::Constant *srcLocStr =
@@ -4146,12 +4198,45 @@ static llvm::Value *emitTaskReductionInitCall(
llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize);
ompBuilder->updateToLocation(ompLoc);
llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident);
+ if (isModifier) {
+ // __kmpc_taskred_modifier_init(loc, gtid, is_ws, num, &arr) opens a
+ // task-reduction scope for the enclosing parallel/worksharing region.
+ llvm::FunctionCallee modInit = ompBuilder->getOrCreateRuntimeFunction(
+ *llvmModule, llvm::omp::OMPRTL___kmpc_taskred_modifier_init);
+ return builder.CreateCall(modInit,
+ {ident, gtid,
+ builder.getInt32(isWorksharing ? 1 : 0),
+ builder.getInt32(n), arrAlloca},
+ ".taskred.desc");
+ }
+ // __kmpc_taskred_init(gtid, num, &arr).
llvm::FunctionCallee taskredInit = ompBuilder->getOrCreateRuntimeFunction(
*llvmModule, llvm::omp::OMPRTL___kmpc_taskred_init);
return builder.CreateCall(taskredInit, {gtid, builder.getInt32(n), arrAlloca},
".taskred.desc");
}
+/// Emits `__kmpc_task_reduction_modifier_fini(loc, gtid, is_ws)` at the current
+/// builder insertion point, closing the task-reduction scope opened by the
+/// `task` reduction modifier on a parallel or worksharing construct.
+static void
+emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ uint32_t srcLocSize;
+ llvm::Constant *srcLocStr =
+ ompBuilder->getOrCreateSrcLocStr(ompLoc, srcLocSize);
+ llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize);
+ ompBuilder->updateToLocation(ompLoc);
+ llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident);
+ llvm::FunctionCallee fini = ompBuilder->getOrCreateRuntimeFunction(
+ *llvmModule, llvm::omp::OMPRTL___kmpc_task_reduction_modifier_fini);
+ builder.CreateCall(fini,
+ {ident, gtid, builder.getInt32(isWorksharing ? 1 : 0)});
+}
+
/// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
@@ -4334,6 +4419,20 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
reductionVariableMap, isByRef, deferredStores)))
return failure();
+ // For `reduction(task, ...)` open a task-reduction scope for the worksharing
+ // loop. Participating explicit tasks accumulate into the per-thread private
+ // copies, which the worksharing reduction then combines across threads.
+ bool isTaskReductionMod =
+ wsloopOp.getReductionMod() == omp::ReductionModifier::task &&
+ wsloopOp.getNumReductionVars() > 0;
+ if (isTaskReductionMod &&
+ !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+ "__omp_taskred_mod_", builder, allocaIP,
+ moduleTranslation, /*isModifier=*/true,
+ /*isWorksharing=*/true))
+ return wsloopOp.emitError(
+ "failed to emit task reduction modifier initialization");
+
// TODO: Handle doacross loops when the ordered clause has a parameter.
bool isOrdered = wsloopOp.getOrdered().has_value();
std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
@@ -4443,6 +4542,11 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
// Set the correct branch target for task cancellation
popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
+ // Close the task-reduction scope before the worksharing reduction combine.
+ if (isTaskReductionMod)
+ emitTaskReductionModifierFini(/*isWorksharing=*/true, builder,
+ moduleTranslation);
+
// Process the reductions if required.
if (failed(createReductionsAndCleanup(
wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
@@ -4475,6 +4579,13 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables(
opInst.getNumReductionVars());
SmallVector<DeferredStore> deferredStores;
+ // Only open a task-reduction scope when the `task` modifier is present and
+ // there are reduction variables to combine; otherwise the matching fini in
+ // the reduction-combine path (guarded by getNumReductionVars() > 0) would be
+ // skipped, leaving the modifier init unbalanced.
+ bool isTaskReductionMod =
+ opInst.getReductionMod() == omp::ReductionModifier::task &&
+ opInst.getNumReductionVars() > 0;
auto bodyGenCB =
[&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
@@ -4522,6 +4633,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
reductionVariableMap, isByRef, deferredStores)))
return llvm::make_error<PreviouslyReportedError>();
+ // For `reduction(task, ...)` open a task-reduction scope so participating
+ // explicit tasks accumulate into the per-thread private copies; the
+ // parallel reduction then combines those copies across the team.
+ if (isTaskReductionMod &&
+ !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+ "__omp_taskred_mod_", builder, allocaIP,
+ moduleTranslation, /*isModifier=*/true,
+ /*isWorksharing=*/false))
+ return llvm::createStringError(
+ "failed to emit task reduction modifier initialization");
+
// Save the alloca insertion point on ModuleTranslation stack for use in
// nested regions.
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
@@ -4549,6 +4671,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Move to region cont block
builder.SetInsertPoint((*regionBlock)->getTerminator());
+ // Close the task-reduction scope before the per-thread reduction
+ // contributions are combined across the team.
+ if (isTaskReductionMod)
+ emitTaskReductionModifierFini(/*isWorksharing=*/false, builder,
+ moduleTranslation);
+
// Generate reductions from info
llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
builder.SetInsertPoint(tempTerminator);
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir
new file mode 100644
index 0000000000000..a15b1ee701a4e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir
@@ -0,0 +1,216 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// The `task` reduction modifier opens a task-reduction scope around the
+// parallel / worksharing region. Verify that
+// __kmpc_taskred_modifier_init is emitted (with the correct `is_ws` argument)
+// after the reduction privates are set up, and that
+// __kmpc_task_reduction_modifier_fini is emitted before the reduction combine.
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+ %c0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %s = llvm.add %arg0, %arg1 : i32
+ omp.yield(%s : i32)
+}
+
+llvm.func @parallel_task_reduction(%x: !llvm.ptr) {
+ omp.parallel reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK: %kmp_taskred_input_t = type { ptr, ptr, i64, ptr, ptr, ptr, i32 }
+
+// On a parallel construct the modifier init uses is_ws = 0.
+// CHECK-LABEL: define internal void @parallel_task_reduction..omp_par
+// CHECK: %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 1, ptr %[[ARR]])
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 0)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+ %c0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %s = llvm.add %arg0, %arg1 : i32
+ omp.yield(%s : i32)
+}
+
+llvm.func @wsloop_task_reduction(%x: !llvm.ptr) {
+ %lb = llvm.mlir.constant(1 : i32) : i32
+ %ub = llvm.mlir.constant(10 : i32) : i32
+ %step = llvm.mlir.constant(1 : i32) : i32
+ omp.wsloop reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// On a worksharing construct the modifier init uses is_ws = 1.
+// CHECK-LABEL: define void @wsloop_task_reduction(
+// CHECK: %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]])
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+ %c0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %s = llvm.add %arg0, %arg1 : i32
+ omp.yield(%s : i32)
+}
+
+llvm.func @sections_task_reduction(%x: !llvm.ptr) {
+ omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+ omp.section {
+ ^bb0(%arg: !llvm.ptr):
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// On a worksharing (sections) construct the modifier init uses is_ws = 1.
+// CHECK-LABEL: define void @sections_task_reduction(
+// CHECK: %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]])
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+ %c0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %s = llvm.add %arg0, %arg1 : i32
+ omp.yield(%s : i32)
+}
+
+llvm.func @parallel_two_task_reductions(%x: !llvm.ptr, %y: !llvm.ptr) {
+ omp.parallel reduction(mod: task, @add_i32 %x -> %p0, @add_i32 %y -> %p1 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+}
+
+// With two task-modifier reductions the descriptor array holds two entries and
+// the modifier init receives num = 2 (is_ws = 0 on the parallel construct).
+// CHECK-LABEL: define internal void @parallel_two_task_reductions..omp_par
+// CHECK: %[[ARR:.+]] = alloca [2 x %kmp_taskred_input_t]
+// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 2, ptr %[[ARR]])
+
+// -----
+
+// An empty omp.sections (only a terminator, no omp.section) hits the
+// empty-sections early return, so no task-reduction scope is opened: neither
+// the modifier init nor the matching fini may be emitted.
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+ %c0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %s = llvm.add %arg0, %arg1 : i32
+ omp.yield(%s : i32)
+}
+
+llvm.func @empty_sections_task_reduction(%x: !llvm.ptr) {
+ omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define void @empty_sections_task_reduction(
+// CHECK-NOT: @__kmpc_taskred_modifier_init
+// CHECK-NOT: @__kmpc_task_reduction_modifier_fini
+// CHECK: ret void
+
+// -----
+
+// A verifier-valid omp.parallel that carries reduction_mod = task but has no
+// reduction variables must not open a task-reduction scope.
+
+llvm.func @parallel_task_mod_no_reductions() {
+ "omp.parallel"() <{operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0>, reduction_mod = #omp<reduction_modifier(task)>}> ({
+ omp.terminator
+ }) : () -> ()
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @parallel_task_mod_no_reductions..omp_par
+// CHECK-NOT: @__kmpc_taskred_modifier_init
+// CHECK-NOT: @__kmpc_task_reduction_modifier_fini
+// CHECK: ret void
+
+// -----
+
+// A verifier-valid omp.wsloop that carries reduction_mod = task but has no
+// reduction variables must not open a task-reduction scope.
+
+llvm.func @wsloop_task_mod_no_reductions() {
+ %lb = llvm.mlir.constant(1 : i32) : i32
+ %ub = llvm.mlir.constant(10 : i32) : i32
+ %step = llvm.mlir.constant(1 : i32) : i32
+ "omp.wsloop"() <{operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0>, reduction_mod = #omp<reduction_modifier(task)>}> ({
+ "omp.loop_nest"(%lb, %ub, %step) <{loop_inclusive}> ({
+ ^bb0(%iv: i32):
+ "omp.yield"() : () -> ()
+ }) : (i32, i32, i32) -> ()
+ }) : () -> ()
+ llvm.return
+}
+
+// CHECK-LABEL: define void @wsloop_task_mod_no_reductions(
+// CHECK-NOT: @__kmpc_taskred_modifier_init
+// CHECK-NOT: @__kmpc_task_reduction_modifier_fini
+// CHECK: ret void
+
+// -----
+
+// A verifier-valid omp.sections that carries reduction_mod = task but has no
+// reduction variables must not open a task-reduction scope. A section body is
+// present, so this exercises the reduction-count guard rather than the
+// empty-sections early return tested above.
+
+llvm.func @sections_task_mod_no_reductions() {
+ "omp.sections"() <{operandSegmentSizes = array<i32: 0, 0, 0, 0>, reduction_mod = #omp<reduction_modifier(task)>}> ({
+ "omp.section"() ({
+ "omp.terminator"() : () -> ()
+ }) : () -> ()
+ "omp.terminator"() : () -> ()
+ }) : () -> ()
+ llvm.return
+}
+
+// CHECK-LABEL: define void @sections_task_mod_no_reductions(
+// CHECK-NOT: @__kmpc_taskred_modifier_init
+// CHECK-NOT: @__kmpc_task_reduction_modifier_fini
+// CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 377a5bb799be4..4d23fcafc80bd 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -134,6 +134,34 @@ llvm.func @scan_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
// -----
+omp.declare_reduction @add_f32 : f32
+init {
+^bb0(%arg: f32):
+ %0 = llvm.mlir.constant(0.0 : f32) : f32
+ omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+ %1 = llvm.fadd %arg0, %arg1 : f32
+ omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+ %2 = llvm.load %arg3 : !llvm.ptr -> f32
+ llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
+ omp.yield
+}
+llvm.func @parallel_task_reduction_modifier_byref(%x : !llvm.ptr) {
+ // expected-error at below {{not yet implemented: Unhandled clause task reduction modifier with by-ref reduction in omp.parallel operation}}
+ // expected-error at below {{LLVM Translation failed for operation: omp.parallel}}
+ omp.parallel reduction(mod: task, byref @add_f32 %x -> %prv : !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+}
+
+// -----
+
llvm.func @single_allocate(%x : !llvm.ptr) {
// expected-error at below {{not yet implemented: Unhandled clause allocate in omp.single operation}}
// expected-error at below {{LLVM Translation failed for operation: omp.single}}
More information about the flang-commits
mailing list