[llvm-branch-commits] [flang] [flang][fir] Basci lowering `fir.do_concurrent` locality specs to `fir.do_loop ... unordered` (PR #138512)
Kareem Ergawy via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon May 5 04:56:03 PDT 2025
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/138512
Extends lowering `fir.do_concurrent` to `fir.do_loop ... unordered` by adding support for locality specifiers.
In particular, for `local` specifiers, a `fir.alloca` op is created using the localizer type. For `local_init` specifiers, the `copy` region is additionally inlined in the `do concurrent` loop's body.
>From 07c29b389e0edd2dbe59f2ce38712429f067ac00 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 5 May 2025 06:50:49 -0500
Subject: [PATCH] [flang][fir] Basci lowering `fir.do_concurrent` locality
specs to `fir.do_loop ... unordered`
Extends lowering `fir.do_concurrent` to `fir.do_loop ... unordered` by
adding support for locality specifiers.
In particular, for `local` specifiers, a `fir.alloca` op is created
using the localizer type. For `local_init` specifiers, the `copy` region
is additionally inlined in the `do concurrent` loop's body.
---
.../Transforms/SimplifyFIROperations.cpp | 58 +++++++++++++++++-
.../do_concurrent-to-do_loop-unodered.fir | 61 +++++++++++++++++++
2 files changed, 118 insertions(+), 1 deletion(-)
diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
index 6d106046b70f2..e2dc4e14ff650 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
@@ -149,6 +149,17 @@ mlir::LogicalResult BoxTotalElementsConversion::matchAndRewrite(
class DoConcurrentConversion
: public mlir::OpRewritePattern<fir::DoConcurrentOp> {
+ /// Looks up from the operation from and returns the LocalitySpecifierOp with
+ /// name symbolName
+ static fir::LocalitySpecifierOp
+ findLocalizer(mlir::Operation *from, mlir::SymbolRefAttr symbolName) {
+ fir::LocalitySpecifierOp localizer =
+ mlir::SymbolTable::lookupNearestSymbolFrom<fir::LocalitySpecifierOp>(
+ from, symbolName);
+ assert(localizer && "localizer not found in the symbol table");
+ return localizer;
+ }
+
public:
using mlir::OpRewritePattern<fir::DoConcurrentOp>::OpRewritePattern;
@@ -162,7 +173,52 @@ class DoConcurrentConversion
assert(loop.getRegion().hasOneBlock());
mlir::Block &loopBlock = loop.getRegion().getBlocks().front();
- // Collect iteration variable(s) allocations do that we can move them
+ // Handle localization
+ if (!loop.getLocalVars().empty()) {
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(&loop.getRegion().front());
+
+ std::optional<mlir::ArrayAttr> localSyms = loop.getLocalSyms();
+
+ for (auto [localVar, localArg, localizerSym] : llvm::zip_equal(
+ loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) {
+ mlir::SymbolRefAttr localizerName =
+ llvm::cast<mlir::SymbolRefAttr>(localizerSym);
+ fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName);
+
+ mlir::Value localAlloc =
+ rewriter.create<fir::AllocaOp>(loop.getLoc(), localizer.getType());
+
+ if (localizer.getLocalitySpecifierType() ==
+ fir::LocalitySpecifierType::LocalInit) {
+ // It is reasonable to make this assumption since, at this stage,
+ // control-flow ops are not converted yet. Therefore, things like `if`
+ // conditions will still be represented by their encapsulating `fir`
+ // dialect ops.
+ assert(localizer.getCopyRegion().hasOneBlock() &&
+ "Expected localizer to have a single block.");
+ mlir::Block *beforeLocalInit = rewriter.getInsertionBlock();
+ mlir::Block *afterLocalInit = rewriter.splitBlock(
+ rewriter.getInsertionBlock(), rewriter.getInsertionPoint());
+ rewriter.cloneRegionBefore(localizer.getCopyRegion(), afterLocalInit);
+ mlir::Block *copyRegionBody = beforeLocalInit->getNextNode();
+
+ rewriter.eraseOp(copyRegionBody->getTerminator());
+ rewriter.mergeBlocks(afterLocalInit, copyRegionBody);
+ rewriter.mergeBlocks(copyRegionBody, beforeLocalInit,
+ {localVar, localArg});
+ }
+
+ rewriter.replaceAllUsesWith(localArg, localAlloc);
+ }
+
+ loop.getRegion().front().eraseArguments(loop.getNumInductionVars(),
+ loop.getNumLocalOperands());
+ loop.getLocalVarsMutable().clear();
+ loop.setLocalSymsAttr(nullptr);
+ }
+
+ // Collect iteration variable(s) allocations so that we can move them
// outside the `fir.do_concurrent` wrapper.
llvm::SmallVector<mlir::Operation *> opsToMove;
for (mlir::Operation &op : llvm::drop_end(wrapperBlock))
diff --git a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
index d2ceafdda5b22..d9ef36b175598 100644
--- a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
+++ b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
@@ -121,3 +121,64 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index,
// CHECK: }
// CHECK: return
// CHECK: }
+
+// -----
+
+fir.local {type = local} @local_localizer : i32
+
+fir.local {type = local_init} @local_init_localizer : i32 copy {
+^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+ %0 = fir.load %arg0 : !fir.ref<i32>
+ fir.store %0 to %arg1 : !fir.ref<i32>
+ fir.yield(%arg1 : !fir.ref<i32>)
+}
+
+func.func @do_concurrent_locality_specs() {
+ %3 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrentElocal_init_var"}
+ %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrentElocal_var"}
+ %6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 1 : index
+ fir.do_concurrent {
+ %9 = fir.alloca i32 {bindc_name = "i"}
+ %10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) local(@local_localizer %6#0 -> %arg1, @local_init_localizer %4#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>) {
+ %11 = fir.convert %arg0 : (index) -> i32
+ fir.store %11 to %10#0 : !fir.ref<i32>
+ %13:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %15:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %17 = fir.load %10#0 : !fir.ref<i32>
+ %c5_i32 = arith.constant 5 : i32
+ %18 = arith.cmpi slt, %17, %c5_i32 : i32
+ fir.if %18 {
+ %c42_i32 = arith.constant 42 : i32
+ hlfir.assign %c42_i32 to %13#0 : i32, !fir.ref<i32>
+ } else {
+ %c84_i32 = arith.constant 84 : i32
+ hlfir.assign %c84_i32 to %15#0 : i32, !fir.ref<i32>
+ }
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @do_concurrent_locality_specs() {
+// CHECK: %[[LOC_INIT_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Elocal_init_var"}
+// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered {
+// Verify localization of the `local` var.
+// CHECK: %[[PRIV_LOC_ALLOC:.*]] = fir.alloca i32
+
+// Verify localization of the `local_init` var.
+// CHECK: %[[PRIV_LOC_INIT_ALLOC:.*]] = fir.alloca i32
+// CHECK: %[[LOC_INIT_VAL:.*]] = fir.load %[[LOC_INIT_DECL]]#0 : !fir.ref<i32>
+// CHECK: fir.store %[[LOC_INIT_VAL]] to %[[PRIV_LOC_INIT_ALLOC]] : !fir.ref<i32>
+
+// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[PRIV_LOC_ALLOC]]
+// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[PRIV_LOC_INIT_ALLOC]]
+
+// CHECK: hlfir.assign %{{.*}} to %[[VAL_15]]#0 : i32, !fir.ref<i32>
+// CHECK: hlfir.assign %{{.*}} to %[[VAL_16]]#0 : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: return
+// CHECK: }
More information about the llvm-branch-commits
mailing list