[flang-commits] [flang] 25c95eb - [flang][fir] Convert `fir.do_loop` with the unordered attribute to `scf.parallel`. (#168510)
via flang-commits
flang-commits at lists.llvm.org
Tue Nov 25 06:43:46 PST 2025
Author: Ming Yan
Date: 2025-11-25T14:43:41Z
New Revision: 25c95ebfa82e2f6a20cf1282aaef09d1cc598ee7
URL: https://github.com/llvm/llvm-project/commit/25c95ebfa82e2f6a20cf1282aaef09d1cc598ee7
DIFF: https://github.com/llvm/llvm-project/commit/25c95ebfa82e2f6a20cf1282aaef09d1cc598ee7.diff
LOG: [flang][fir] Convert `fir.do_loop` with the unordered attribute to `scf.parallel`. (#168510)
Refines the existing conversion to allow `fir.do_loop` annotated with
`unordered` to be lowered to `scf.parallel`, while other loops retain
their original lowering.
Added:
Modified:
flang/include/flang/Optimizer/Transforms/Passes.h
flang/include/flang/Optimizer/Transforms/Passes.td
flang/lib/Optimizer/Transforms/FIRToSCF.cpp
flang/test/Fir/FirToSCF/do-loop.fir
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 6f5dff4687cbb..f83a1559fa016 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -53,6 +53,9 @@ std::unique_ptr<mlir::Pass> createVScaleAttrPass();
std::unique_ptr<mlir::Pass>
createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
+void populateFIRToSCFRewrites(mlir::RewritePatternSet &patterns,
+ bool parallelUnordered = false);
+
void populateCfgConversionRewrites(mlir::RewritePatternSet &patterns,
bool forceLoopToExecuteOnce = false,
bool setNSW = true);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index bb2509b1747d5..0f613584c6e17 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -85,6 +85,10 @@ def FIRToSCFPass : Pass<"fir-to-scf"> {
let dependentDialects = [
"fir::FIROpsDialect", "mlir::scf::SCFDialect"
];
+ let options = [Option<"parallelUnordered", "parallel-unordered", "bool",
+ /*default=*/"false",
+ "Allow converting a fir.do_loop with the `unordered` "
+ "attribute to scf.parallel (experimental).">];
}
def AnnotateConstantOperands : Pass<"annotate-constant"> {
diff --git a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
index 70d6ebbcb039c..e72ee333101f5 100644
--- a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
+++ b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
@@ -25,11 +25,18 @@ class FIRToSCFPass : public fir::impl::FIRToSCFPassBase<FIRToSCFPass> {
struct DoLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
using OpRewritePattern<fir::DoLoopOp>::OpRewritePattern;
+ DoLoopConversion(mlir::MLIRContext *context,
+ bool parallelUnorderedLoop = false,
+ mlir::PatternBenefit benefit = 1)
+ : OpRewritePattern<fir::DoLoopOp>(context, benefit),
+ parallelUnorderedLoop(parallelUnorderedLoop) {}
+
mlir::LogicalResult
matchAndRewrite(fir::DoLoopOp doLoopOp,
mlir::PatternRewriter &rewriter) const override {
mlir::Location loc = doLoopOp.getLoc();
bool hasFinalValue = doLoopOp.getFinalValue().has_value();
+ bool isUnordered = doLoopOp.getUnordered().has_value();
// Get loop values from the DoLoopOp
mlir::Value low = doLoopOp.getLowerBound();
@@ -53,39 +60,54 @@ struct DoLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
mlir::arith::DivSIOp::create(rewriter, loc, distance, step);
auto zero = mlir::arith::ConstantIndexOp::create(rewriter, loc, 0);
auto one = mlir::arith::ConstantIndexOp::create(rewriter, loc, 1);
- auto scfForOp =
- mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one, iterArgs);
+ // Create the scf.for or scf.parallel operation
+ mlir::Operation *scfLoopOp = nullptr;
+ if (isUnordered && parallelUnorderedLoop) {
+ scfLoopOp = mlir::scf::ParallelOp::create(rewriter, loc, {zero},
+ {tripCount}, {one}, iterArgs);
+ } else {
+ scfLoopOp = mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one,
+ iterArgs);
+ }
+
+ // Move the body of the fir.do_loop to the scf.for or scf.parallel
auto &loopOps = doLoopOp.getBody()->getOperations();
auto resultOp =
mlir::cast<fir::ResultOp>(doLoopOp.getBody()->getTerminator());
auto results = resultOp.getOperands();
- mlir::Block *loweredBody = scfForOp.getBody();
+ auto scfLoopLikeOp = mlir::cast<mlir::LoopLikeOpInterface>(scfLoopOp);
+ mlir::Block &scfLoopBody = scfLoopLikeOp.getLoopRegions().front()->front();
- loweredBody->getOperations().splice(loweredBody->begin(), loopOps,
- loopOps.begin(),
- std::prev(loopOps.end()));
+ scfLoopBody.getOperations().splice(scfLoopBody.begin(), loopOps,
+ loopOps.begin(),
+ std::prev(loopOps.end()));
- rewriter.setInsertionPointToStart(loweredBody);
+ rewriter.setInsertionPointToStart(&scfLoopBody);
mlir::Value iv = mlir::arith::MulIOp::create(
- rewriter, loc, scfForOp.getInductionVar(), step);
+ rewriter, loc, scfLoopLikeOp.getSingleInductionVar().value(), step);
iv = mlir::arith::AddIOp::create(rewriter, loc, low, iv);
if (!results.empty()) {
- rewriter.setInsertionPointToEnd(loweredBody);
+ rewriter.setInsertionPointToEnd(&scfLoopBody);
mlir::scf::YieldOp::create(rewriter, resultOp->getLoc(), results);
}
doLoopOp.getInductionVar().replaceAllUsesWith(iv);
- rewriter.replaceAllUsesWith(doLoopOp.getRegionIterArgs(),
- hasFinalValue
- ? scfForOp.getRegionIterArgs().drop_front()
- : scfForOp.getRegionIterArgs());
-
- // Copy all the attributes from the old to new op.
- scfForOp->setAttrs(doLoopOp->getAttrs());
- rewriter.replaceOp(doLoopOp, scfForOp);
+ rewriter.replaceAllUsesWith(
+ doLoopOp.getRegionIterArgs(),
+ hasFinalValue ? scfLoopLikeOp.getRegionIterArgs().drop_front()
+ : scfLoopLikeOp.getRegionIterArgs());
+
+ // Copy loop annotations from the fir.do_loop to scf loop op.
+ if (auto ann = doLoopOp.getLoopAnnotation())
+ scfLoopOp->setAttr("loop_annotation", *ann);
+
+ rewriter.replaceOp(doLoopOp, scfLoopOp);
return mlir::success();
}
+
+private:
+ bool parallelUnorderedLoop;
};
struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> {
@@ -197,10 +219,15 @@ struct IfConversion : public mlir::OpRewritePattern<fir::IfOp> {
};
} // namespace
+void fir::populateFIRToSCFRewrites(mlir::RewritePatternSet &patterns,
+ bool parallelUnordered) {
+ patterns.add<IterWhileConversion, IfConversion>(patterns.getContext());
+ patterns.add<DoLoopConversion>(patterns.getContext(), parallelUnordered);
+}
+
void FIRToSCFPass::runOnOperation() {
mlir::RewritePatternSet patterns(&getContext());
- patterns.add<DoLoopConversion, IterWhileConversion, IfConversion>(
- patterns.getContext());
+ fir::populateFIRToSCFRewrites(patterns, parallelUnordered);
walkAndApplyPatterns(getOperation(), std::move(patterns));
}
diff --git a/flang/test/Fir/FirToSCF/do-loop.fir b/flang/test/Fir/FirToSCF/do-loop.fir
index 812497c8d0c74..8862a4c2969e8 100644
--- a/flang/test/Fir/FirToSCF/do-loop.fir
+++ b/flang/test/Fir/FirToSCF/do-loop.fir
@@ -1,4 +1,5 @@
-// RUN: fir-opt %s --fir-to-scf | FileCheck %s
+// RUN: fir-opt %s --fir-to-scf --split-input-file | FileCheck %s --check-prefixes=CHECK,NO-PARALLEL
+// RUN: fir-opt %s --fir-to-scf='parallel-unordered' --split-input-file | FileCheck %s --check-prefixes=CHECK,PARALLEL
// CHECK-LABEL: func.func @simple_loop(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>) {
@@ -31,6 +32,8 @@ func.func @simple_loop(%arg0: !fir.ref<!fir.array<100xi32>>) {
return
}
+// -----
+
// CHECK-LABEL: func.func @loop_with_negtive_step(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>) {
// CHECK: %[[VAL_0:.*]] = arith.constant 100 : index
@@ -64,6 +67,8 @@ func.func @loop_with_negtive_step(%arg0: !fir.ref<!fir.array<100xi32>>) {
return
}
+// -----
+
// CHECK-LABEL: func.func @loop_with_results(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>,
// CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32>) {
@@ -102,6 +107,8 @@ func.func @loop_with_results(%arg0: !fir.ref<!fir.array<100xi32>>, %arg1: !fir.r
return
}
+// -----
+
// CHECK-LABEL: func.func @loop_with_final_value(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>,
// CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32>) {
@@ -146,6 +153,45 @@ func.func @loop_with_final_value(%arg0: !fir.ref<!fir.array<100xi32>>, %arg1: !f
return
}
+// -----
+
+// CHECK-LABEL: func.func @loop_with_unordered_attr(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>) {
+// CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK: %[[CONSTANT_1:.*]] = arith.constant 100 : index
+// CHECK: %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : i32
+// CHECK: %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_1]], %[[CONSTANT_0]] : index
+// CHECK: %[[ADDI_0:.*]] = arith.addi %[[SUBI_0]], %[[CONSTANT_0]] : index
+// CHECK: %[[DIVSI_0:.*]] = arith.divsi %[[ADDI_0]], %[[CONSTANT_0]] : index
+// CHECK: %[[CONSTANT_3:.*]] = arith.constant 0 : index
+// CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index
+// PARALLEL: scf.parallel (%[[VAL_0:.*]]) = (%[[CONSTANT_3]]) to (%[[DIVSI_0]]) step (%[[CONSTANT_4]]) {
+// NO-PARALLEL: scf.for %[[VAL_0:.*]] = %[[CONSTANT_3]] to %[[DIVSI_0]] step %[[CONSTANT_4]] {
+// CHECK: %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[CONSTANT_0]] : index
+// CHECK: %[[ADDI_1:.*]] = arith.addi %[[CONSTANT_0]], %[[MULI_0]] : index
+// CHECK: %[[ARRAY_COOR_0:.*]] = fir.array_coor %[[ARG0]](%[[SHAPE_0]]) %[[ADDI_1]] : (!fir.ref<!fir.array<100xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
+// CHECK: fir.store %[[CONSTANT_2]] to %[[ARRAY_COOR_0]] : !fir.ref<i32>
+// PARALLEL: scf.reduce
+// CHECK: }
+// CHECK: return
+// CHECK: }
+func.func @loop_with_unordered_attr(%arg0: !fir.ref<!fir.array<100xi32>>) {
+ %c1 = arith.constant 1 : index
+ %c100 = arith.constant 100 : index
+ %0 = fir.shape %c100 : (index) -> !fir.shape<1>
+ %c1_i32 = arith.constant 1 : i32
+ fir.do_loop %arg1 = %c1 to %c100 step %c1 unordered {
+ %1 = fir.array_coor %arg0(%0) %arg1 : (!fir.ref<!fir.array<100xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
+ fir.store %c1_i32 to %1 : !fir.ref<i32>
+ }
+ return
+}
+
+// -----
+
+// CHECK: #[[$ATTR_0:.+]] = #llvm.loop_vectorize<disable = false>
+// CHECK: #[[$ATTR_1:.+]] = #llvm.loop_annotation<vectorize = #[[$ATTR_0]]>
// CHECK-LABEL: func.func @loop_with_attribute(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>,
// CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32>) {
@@ -167,16 +213,19 @@ func.func @loop_with_final_value(%arg0: !fir.ref<!fir.array<100xi32>>, %arg1: !f
// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
// CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_15]], %[[VAL_14]] : i32
// CHECK: fir.store %[[VAL_16]] to %[[VAL_3]] : !fir.ref<i32>
-// CHECK: } {operandSegmentSizes = array<i32: 1, 1, 1, 1, 0>, reduceAttrs = [#fir.reduce_attr<add>]}
+// CHECK: } {loop_annotation = #[[$ATTR_1]]}
// CHECK: return
// CHECK: }
+
+#loop_vectorize = #llvm.loop_vectorize<disable = false>
+#loop_annotation = #llvm.loop_annotation<vectorize = #loop_vectorize>
func.func @loop_with_attribute(%arg0: !fir.ref<!fir.array<100xi32>>, %arg1: !fir.ref<i32>) {
%c1 = arith.constant 1 : index
%c0_i32 = arith.constant 0 : i32
%c100 = arith.constant 100 : index
%0 = fir.alloca i32
%1 = fir.shape %c100 : (index) -> !fir.shape<1>
- fir.do_loop %arg2 = %c1 to %c100 step %c1 reduce(#fir.reduce_attr<add> -> %0 : !fir.ref<i32>) {
+ fir.do_loop %arg2 = %c1 to %c100 step %c1 attributes {loopAnnotation = #loop_annotation} {
%2 = fir.array_coor %arg0(%1) %arg2 : (!fir.ref<!fir.array<100xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
%3 = fir.load %2 : !fir.ref<i32>
%4 = fir.load %0 : !fir.ref<i32>
@@ -187,6 +236,8 @@ func.func @loop_with_attribute(%arg0: !fir.ref<!fir.array<100xi32>>, %arg1: !fir
return
}
+// -----
+
// CHECK-LABEL: func.func @nested_loop(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100x100xi32>>) {
// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
More information about the flang-commits
mailing list