[flang-commits] [flang] [OpenMP][flang] Move `todo` for checking reduction support status on the GPU (PR #175172)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Wed Jan 21 03:58:15 PST 2026
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/175172
>From 9fc2c6a50c8037e8bd160b8817cb165918054f88 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 9 Jan 2026 08:07:19 -0600
Subject: [PATCH 1/3] [OpenMP][flang] Move `todo` for checking reduction
support status on the GPU
Moves a `todo` to check for the current level of support for by-ref
reductions to the `FunctionFiltering` pass. This guarantees that the
check does not trigger when the same module is compiled twice: on the
CPU and on the GPU.
---
.../lib/Lower/Support/ReductionProcessor.cpp | 20 ----------------
.../Optimizer/OpenMP/FunctionFiltering.cpp | 24 +++++++++++++++++++
2 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp
index db8ad909b1d2f..0e01268dd74ff 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -598,26 +598,6 @@ DeclareRedType ReductionProcessor::createDeclareReductionHelper(
genCombinerCB(builder, loc, type, op1, op2, isByRef);
if (isByRef && fir::isa_box_type(valTy)) {
- bool isBoxReductionSupported = [&]() {
- auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
- *builder.getModule());
-
- // This check tests the implementation status on the GPU. Box reductions
- // are fully supported on the CPU.
- if (!offloadMod.getIsGPU())
- return true;
-
- auto seqTy = mlir::dyn_cast<fir::SequenceType>(boxedTy);
-
- // Dynamically-shaped arrays are not supported yet on the GPU.
- return !seqTy || !fir::sequenceWithNonConstantShape(seqTy);
- }();
-
- if (!isBoxReductionSupported) {
- TODO(loc, "Reduction of dynamically-shaped arrays are not supported yet "
- "on the GPU.");
- }
-
mlir::Region &dataPtrPtrRegion = decl.getDataPtrPtrRegion();
mlir::Block &dataAddrBlock = *builder.createBlock(
&dataPtrPtrRegion, dataPtrPtrRegion.end(), {type}, {loc});
diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
index 3031bb5da6919..d0bd09e1d1e84 100644
--- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
+++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/OpenMP/Passes.h"
@@ -101,6 +102,29 @@ class FunctionFilteringPass
}
return WalkResult::advance();
});
+
+ if (op.getIsGPU())
+ op->walk<WalkOrder::PreOrder>([&](omp::DeclareReductionOp redOp) {
+ if (redOp.symbolKnownUseEmpty(op))
+ return WalkResult::advance();
+
+ if (!redOp.getByrefElementType())
+ return WalkResult::advance();
+
+ auto seqTy =
+ mlir::dyn_cast<fir::SequenceType>(*redOp.getByrefElementType());
+
+ bool isByRefReductionSupported =
+ !seqTy || !fir::sequenceWithNonConstantShape(seqTy);
+
+ if (!isByRefReductionSupported) {
+ TODO(redOp.getLoc(),
+ "Reduction of dynamically-shaped arrays are not supported yet "
+ "on the GPU.");
+ }
+
+ return WalkResult::advance();
+ });
}
};
} // namespace
>From 7b958fb8031fd05f8247c53afde2937a6218e395 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 20 Jan 2026 01:24:25 -0600
Subject: [PATCH 2/3] review comments and test
---
.../Optimizer/OpenMP/FunctionFiltering.cpp | 51 +++++++++++--------
.../omp-function-filtering-todo.mlir | 33 ++++++++++++
2 files changed, 62 insertions(+), 22 deletions(-)
create mode 100644 flang/test/Transforms/omp-function-filtering-todo.mlir
diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
index d0bd09e1d1e84..62101a61049fd 100644
--- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
+++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
@@ -30,6 +30,34 @@ namespace flangomp {
using namespace mlir;
namespace {
+void checkDeviceImplementationStatus(
+ omp::OffloadModuleInterface offloadModule) {
+ if (!offloadModule.getIsGPU())
+ return;
+
+ offloadModule->walk<WalkOrder::PreOrder>([&](omp::DeclareReductionOp redOp) {
+ if (redOp.symbolKnownUseEmpty(offloadModule))
+ return WalkResult::advance();
+
+ if (!redOp.getByrefElementType())
+ return WalkResult::advance();
+
+ auto seqTy =
+ mlir::dyn_cast<fir::SequenceType>(*redOp.getByrefElementType());
+
+ bool isByRefReductionSupported =
+ !seqTy || !fir::sequenceWithNonConstantShape(seqTy);
+
+ if (!isByRefReductionSupported) {
+ TODO(redOp.getLoc(),
+ "Reduction of dynamically-shaped arrays are not supported yet "
+ "on the GPU.");
+ }
+
+ return WalkResult::advance();
+ });
+}
+
class FunctionFilteringPass
: public flangomp::impl::FunctionFilteringPassBase<FunctionFilteringPass> {
public:
@@ -103,28 +131,7 @@ class FunctionFilteringPass
return WalkResult::advance();
});
- if (op.getIsGPU())
- op->walk<WalkOrder::PreOrder>([&](omp::DeclareReductionOp redOp) {
- if (redOp.symbolKnownUseEmpty(op))
- return WalkResult::advance();
-
- if (!redOp.getByrefElementType())
- return WalkResult::advance();
-
- auto seqTy =
- mlir::dyn_cast<fir::SequenceType>(*redOp.getByrefElementType());
-
- bool isByRefReductionSupported =
- !seqTy || !fir::sequenceWithNonConstantShape(seqTy);
-
- if (!isByRefReductionSupported) {
- TODO(redOp.getLoc(),
- "Reduction of dynamically-shaped arrays are not supported yet "
- "on the GPU.");
- }
-
- return WalkResult::advance();
- });
+ checkDeviceImplementationStatus(op);
}
};
} // namespace
diff --git a/flang/test/Transforms/omp-function-filtering-todo.mlir b/flang/test/Transforms/omp-function-filtering-todo.mlir
new file mode 100644
index 0000000000000..c5640bb9757f7
--- /dev/null
+++ b/flang/test/Transforms/omp-function-filtering-todo.mlir
@@ -0,0 +1,33 @@
+// RUN: not fir-opt --omp-function-filtering -o - %s 2>&1 | FileCheck %s
+
+module attributes {omp.is_gpu = true, omp.is_target_device = true} {
+ // CHECK: not yet implemented: Reduction of dynamically-shaped arrays are not supported yet on the GPU.
+ omp.declare_reduction @add_reduction_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> attributes {byref_element_type = !fir.array<?xi32>} alloc {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+ omp.yield(%0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+ } init {
+ ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+ omp.yield(%arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+ } combiner {
+ ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+ omp.yield(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+ }
+
+ func.func @foo(%ia : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+ %ia.map = omp.map.info var_ptr(%ia : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, implicit, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "ia"}
+
+ omp.target map_entries(%ia.map -> %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+ omp.parallel {
+ %c1_i32 = arith.constant 1 : i32
+ omp.wsloop reduction(byref @add_reduction_byref_box_heap_Uxi32 %arg0 -> %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+ omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%c1_i32) inclusive step (%c1_i32) {
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+ }
+}
>From 763f0017a3b415288eaea8ebfd01e5270e697f75 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 21 Jan 2026 05:58:00 -0600
Subject: [PATCH 3/3] review comments
---
flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
index 62101a61049fd..e58d5b7e7a389 100644
--- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
+++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
@@ -29,7 +29,14 @@ namespace flangomp {
using namespace mlir;
-namespace {
+/// This function triggers TODO errors and halts compilation if it detects
+/// patterns representing unimplemented features.
+///
+/// It exclusively checks situations that cannot be detected after all of the
+/// MLIR pipeline has ran (i.e. at the MLIR to LLVM IR translation stage, where
+/// the preferred location for these types of checks is), and it only checks for
+/// features that have not been implemented for target offload, but are
+/// supported on host execution.
void checkDeviceImplementationStatus(
omp::OffloadModuleInterface offloadModule) {
if (!offloadModule.getIsGPU())
@@ -58,6 +65,7 @@ void checkDeviceImplementationStatus(
});
}
+namespace {
class FunctionFilteringPass
: public flangomp::impl::FunctionFilteringPassBase<FunctionFilteringPass> {
public:
More information about the flang-commits
mailing list