[flang-commits] [mlir] [flang] [flang][openacc] Add loop expand pass (PR #74045)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu Nov 30 22:42:40 PST 2023
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/74045
`acc.loop` operations get a new design where the induction ranges can be part of the operation directly. The operation can also be a simple wrapper around a loop nest.
This patch adds a loop-expand pass that can transform a acc.loop operation with induction ranges to a acc.loop operation with a fir.do_loop nest inside.
This patch depends on two patches that update the design and lowering:
https://github.com/llvm/llvm-project/pull/67355
https://github.com/llvm/llvm-project/pull/65417
>From 1b3af4267729441f410ac39d055e1bfdd0089957 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 17 Nov 2023 23:08:22 -0800
Subject: [PATCH] [flang][openacc] Add loop expand pass
---
.../flang/Optimizer/Transforms/Passes.h | 3 +
.../flang/Optimizer/Transforms/Passes.td | 8 +
flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 +
.../Transforms/OpenACCLoopExpand.cpp | 170 ++++++++++++++++++
flang/test/Fir/OpenACC/loop-expand.f90 | 118 ++++++++++++
.../mlir/Dialect/OpenACC/OpenACCOps.td | 4 +-
6 files changed, 303 insertions(+), 1 deletion(-)
create mode 100644 flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
create mode 100644 flang/test/Fir/OpenACC/loop-expand.f90
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 92bc7246eca7005..6320690a785a85e 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -45,6 +45,7 @@ namespace fir {
#define GEN_PASS_DECL_ALGEBRAICSIMPLIFICATION
#define GEN_PASS_DECL_POLYMORPHICOPCONVERSION
#define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION
+#define GEN_PASS_DECL_OPENACCLOOPEXPAND
#include "flang/Optimizer/Transforms/Passes.h.inc"
std::unique_ptr<mlir::Pass> createAbstractResultOnFuncOptPass();
@@ -79,6 +80,8 @@ std::unique_ptr<mlir::Pass> createOMPFunctionFilteringPass();
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createOMPMarkDeclareTargetPass();
+std::unique_ptr<mlir::Pass> createOpenACCLoopExpandPass();
+
std::unique_ptr<mlir::Pass> createVScaleAttrPass();
std::unique_ptr<mlir::Pass>
createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index c3768fd2d689c1a..c9f707ba084cb02 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -335,6 +335,14 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> {
];
}
+def OpenACCLoopExpand : Pass<"acc-loop-expand", "mlir::func::FuncOp"> {
+ let summary = "";
+ let constructor = "::fir::createOpenACCLoopExpandPass()";
+ let dependentDialects = [
+ "fir::FIROpsDialect"
+ ];
+}
+
def VScaleAttr : Pass<"vscale-attr", "mlir::func::FuncOp"> {
let summary = "Add vscale_range attribute to functions";
let description = [{
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 03b67104a93b575..03303ee14d91790 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -19,6 +19,7 @@ add_flang_library(FIRTransforms
LoopVersioning.cpp
OMPFunctionFiltering.cpp
OMPMarkDeclareTarget.cpp
+ OpenACCLoopExpand.cpp
VScaleAttr.cpp
DEPENDS
diff --git a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
new file mode 100644
index 000000000000000..3d712c9ea8d7a41
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
@@ -0,0 +1,170 @@
+//===- OpenACCLoopExpand.cpp - expand acc.loop operand to fir.do_loop nest ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace fir {
+#define GEN_PASS_DEF_OPENACCLOOPEXPAND
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+class LoopExpand : public fir::impl::OpenACCLoopExpandBase<LoopExpand> {
+public:
+ void runOnOperation() override;
+};
+
+static mlir::Value retrievePrivatizedIv(mlir::acc::LoopOp &op,
+ mlir::Value value) {
+ for (auto p : op.getPrivateOperands()) {
+ if (p == value) {
+ auto privateOp = mlir::cast<mlir::acc::PrivateOp>(p.getDefiningOp());
+ return privateOp.getVarPtr();
+ }
+ }
+ return mlir::Value{};
+}
+
+/// Reset operands and operand segments for the induction ranges.
+static void clearInductionRangesAndAttrs(fir::FirOpBuilder &builder,
+ mlir::acc::LoopOp &accLoopOp) {
+ // Remove the ranges.
+ accLoopOp.getLowerboundMutable().clear();
+ accLoopOp.getUpperboundMutable().clear();
+ accLoopOp.getStepMutable().clear();
+}
+
+static llvm::SmallVector<mlir::Value>
+getOriginalInductionVars(mlir::acc::LoopOp &accLoopOp) {
+ llvm::SmallVector<mlir::Value> ivs;
+ for (auto arg : accLoopOp.getBody().getArguments()) {
+ mlir::Value privateValue;
+ for (mlir::OpOperand &u : arg.getUses()) {
+ mlir::Operation *owner = u.getOwner();
+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(owner)) {
+ privateValue = storeOp.getMemref();
+ owner->erase();
+ }
+ }
+ mlir::Value originalIv = retrievePrivatizedIv(accLoopOp, privateValue);
+ assert(originalIv && "Expect induction variable to be found");
+ ivs.push_back(originalIv);
+ }
+ return ivs;
+}
+
+void LoopExpand::runOnOperation() {
+ mlir::func::FuncOp func = getOperation();
+
+ mlir::ModuleOp mod = func->getParentOfType<mlir::ModuleOp>();
+ fir::KindMapping kindMap = fir::getKindMapping(mod);
+ fir::FirOpBuilder builder{mod, std::move(kindMap)};
+
+ func.walk([&](mlir::acc::LoopOp accLoopOp) {
+ mlir::Location loc = accLoopOp.getLoc();
+ mlir::Type idxTy = builder.getIndexType();
+
+ bool isStructured = accLoopOp.getLoopRegions().front()->hasOneBlock();
+ bool finalCountValue = isStructured;
+ unsigned nbLoop = accLoopOp.getBody().getNumArguments();
+
+ // Gather original (non-privatized) induction variables.
+ llvm::SmallVector<mlir::Value> ivs = getOriginalInductionVars(accLoopOp);
+
+ // Remove block arguments in order to create loop-nest and move current body
+ // in the newly created loop nest.
+ accLoopOp.getBody().eraseArguments(0, nbLoop);
+ builder.setInsertionPointAfter(accLoopOp);
+
+ if (!isStructured) {
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ return;
+ }
+
+ llvm::SmallVector<mlir::Value> lbs, ubs, steps;
+ llvm::SmallVector<fir::DoLoopOp> loops;
+
+ // Create the loop nest, move the acc.loop body inside and move the loop
+ // nest inside the acc.loop again.
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ bool isInnerLoop = i == (nbLoop - 1);
+
+ lbs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getLowerbound()[i]));
+ ubs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getUpperbound()[i]));
+ steps.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getStep()[i]));
+ fir::DoLoopOp doLoopOp = builder.create<fir::DoLoopOp>(
+ loc, lbs[i], ubs[i], steps[i], /*unordered=*/false, finalCountValue,
+ mlir::ValueRange{accLoopOp.getLowerbound()[i]});
+ loops.push_back(doLoopOp);
+
+ if (isInnerLoop) {
+ // Move acc.loop body inside the newly created fir.do_loop.
+ accLoopOp.getBody().getTerminator()->erase();
+ doLoopOp.getRegion().takeBody(*accLoopOp.getLoopRegions().front());
+ // Recreate the block arguments.
+ doLoopOp.getBody()->addArgument(builder.getIndexType(), loc);
+ doLoopOp.getBody()->addArgument(accLoopOp.getLowerbound()[i].getType(),
+ loc);
+ } else {
+ builder.setInsertionPointToStart(doLoopOp.getBody());
+ }
+ }
+
+ // Move the loop nest inside the acc.loop region.
+ mlir::Block *newAccLoopBlock =
+ builder.createBlock(accLoopOp.getLoopRegions().front());
+ loops[0].getOperation()->moveBefore(newAccLoopBlock,
+ newAccLoopBlock->end());
+
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ builder.setInsertionPointToStart(loops[i].getBody());
+ builder.create<fir::StoreOp>(loc, loops[i].getBody()->getArgument(1),
+ ivs[i]);
+
+ builder.setInsertionPointToEnd(loops[i].getBody());
+ llvm::SmallVector<mlir::Value, 2> results;
+ if (finalCountValue)
+ results.push_back(builder.create<mlir::arith::AddIOp>(
+ loc, loops[i].getInductionVar(), loops[i].getStep()));
+
+ // Step loopVariable to help optimizations such as vectorization.
+ // Induction variable elimination will clean up as necessary.
+ mlir::Value convStep = builder.create<fir::ConvertOp>(
+ loc, accLoopOp.getStep()[i].getType(), loops[i].getStep());
+ mlir::Value loopVar = builder.create<fir::LoadOp>(loc, ivs[i]);
+ results.push_back(
+ builder.create<mlir::arith::AddIOp>(loc, loopVar, convStep));
+ builder.create<fir::ResultOp>(loc, results);
+
+ // Convert ops have been created outside of the acc.loop operation. They
+ // need to be moved back before their uses.
+ lbs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ ubs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ steps[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ }
+
+ builder.setInsertionPointToEnd(newAccLoopBlock);
+ builder.create<mlir::acc::YieldOp>(loc);
+
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ });
+}
+
+std::unique_ptr<mlir::Pass> fir::createOpenACCLoopExpandPass() {
+ return std::make_unique<LoopExpand>();
+}
diff --git a/flang/test/Fir/OpenACC/loop-expand.f90 b/flang/test/Fir/OpenACC/loop-expand.f90
new file mode 100644
index 000000000000000..2efb2b2bd753355
--- /dev/null
+++ b/flang/test/Fir/OpenACC/loop-expand.f90
@@ -0,0 +1,118 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | fir-opt --split-input-file --acc-loop-expand | FileCheck %s
+
+subroutine singleloop(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPsingleloop
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingleloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine single_loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsingle_loop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingle_loop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: fir.do_loop
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop collapse(2)
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %1 {uniq_name = "_QFloop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[J:.*]]:2 = hlfir.declare %3 {uniq_name = "_QFloop_with_nestEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: %[[LB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[LB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG3:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] iter_args(%[[ARG4:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG4]] to %[[J]]#1 : !fir.ref<i32>
+
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG3]], %[[STEP1]] : index
+! CHECK: %[[CONV_STEP1:.*]] = fir.convert %[[STEP1]] : (index) -> i32
+! CHECK: %[[LOAD_J:.*]] = fir.load %[[J]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_J]], %[[CONV_STEP1]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP0:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %18 : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_unstructured(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ if (a(i) > 0.0) stop 'stop'
+ a(i) = i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_unstructured
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 391e77e0c4081a3..62ab100847619a2 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1218,6 +1218,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
/// The i-th data operand passed.
Value getDataOperand(unsigned i);
+
+ Block &getBody() { return getLoopRegions().front()->front(); }
}];
let hasCustomAssemblyFormat = 1;
@@ -1237,7 +1239,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
)
$region
( `(` type($results)^ `)` )?
- attr-dict-with-keyword
+ attr-dict-with-keyword
}];
let hasVerifier = 1;
More information about the flang-commits
mailing list