[flang-commits] [mlir] [flang] [flang][openacc] Add loop expand pass (PR #74045)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Dec 5 10:52:06 PST 2023
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/74045
>From e853a3e4cbabc6358ec9a990c9ac3437aa43ab89 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 17 Nov 2023 23:08:22 -0800
Subject: [PATCH 1/3] [flang][openacc] Add loop expand pass
---
.../flang/Optimizer/Transforms/Passes.h | 3 +
.../flang/Optimizer/Transforms/Passes.td | 8 +
flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 +
.../Transforms/OpenACCLoopExpand.cpp | 170 ++++++++++++++++++
flang/test/Fir/OpenACC/loop-expand.f90 | 118 ++++++++++++
.../mlir/Dialect/OpenACC/OpenACCOps.td | 4 +-
6 files changed, 303 insertions(+), 1 deletion(-)
create mode 100644 flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
create mode 100644 flang/test/Fir/OpenACC/loop-expand.f90
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 92bc7246eca70..6320690a785a8 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -45,6 +45,7 @@ namespace fir {
#define GEN_PASS_DECL_ALGEBRAICSIMPLIFICATION
#define GEN_PASS_DECL_POLYMORPHICOPCONVERSION
#define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION
+#define GEN_PASS_DECL_OPENACCLOOPEXPAND
#include "flang/Optimizer/Transforms/Passes.h.inc"
std::unique_ptr<mlir::Pass> createAbstractResultOnFuncOptPass();
@@ -79,6 +80,8 @@ std::unique_ptr<mlir::Pass> createOMPFunctionFilteringPass();
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createOMPMarkDeclareTargetPass();
+std::unique_ptr<mlir::Pass> createOpenACCLoopExpandPass();
+
std::unique_ptr<mlir::Pass> createVScaleAttrPass();
std::unique_ptr<mlir::Pass>
createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index c3768fd2d689c..c9f707ba084cb 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -335,6 +335,14 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> {
];
}
+def OpenACCLoopExpand : Pass<"acc-loop-expand", "mlir::func::FuncOp"> {
+ let summary = "";
+ let constructor = "::fir::createOpenACCLoopExpandPass()";
+ let dependentDialects = [
+ "fir::FIROpsDialect"
+ ];
+}
+
def VScaleAttr : Pass<"vscale-attr", "mlir::func::FuncOp"> {
let summary = "Add vscale_range attribute to functions";
let description = [{
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 03b67104a93b5..03303ee14d917 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -19,6 +19,7 @@ add_flang_library(FIRTransforms
LoopVersioning.cpp
OMPFunctionFiltering.cpp
OMPMarkDeclareTarget.cpp
+ OpenACCLoopExpand.cpp
VScaleAttr.cpp
DEPENDS
diff --git a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
new file mode 100644
index 0000000000000..3d712c9ea8d7a
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
@@ -0,0 +1,170 @@
+//===- OpenACCLoopExpand.cpp - expand acc.loop operand to fir.do_loop nest ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace fir {
+#define GEN_PASS_DEF_OPENACCLOOPEXPAND
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+class LoopExpand : public fir::impl::OpenACCLoopExpandBase<LoopExpand> {
+public:
+ void runOnOperation() override;
+};
+
+static mlir::Value retrievePrivatizedIv(mlir::acc::LoopOp &op,
+ mlir::Value value) {
+ for (auto p : op.getPrivateOperands()) {
+ if (p == value) {
+ auto privateOp = mlir::cast<mlir::acc::PrivateOp>(p.getDefiningOp());
+ return privateOp.getVarPtr();
+ }
+ }
+ return mlir::Value{};
+}
+
+/// Reset operands and operand segments for the induction ranges.
+static void clearInductionRangesAndAttrs(fir::FirOpBuilder &builder,
+ mlir::acc::LoopOp &accLoopOp) {
+ // Remove the ranges.
+ accLoopOp.getLowerboundMutable().clear();
+ accLoopOp.getUpperboundMutable().clear();
+ accLoopOp.getStepMutable().clear();
+}
+
+static llvm::SmallVector<mlir::Value>
+getOriginalInductionVars(mlir::acc::LoopOp &accLoopOp) {
+ llvm::SmallVector<mlir::Value> ivs;
+ for (auto arg : accLoopOp.getBody().getArguments()) {
+ mlir::Value privateValue;
+ for (mlir::OpOperand &u : arg.getUses()) {
+ mlir::Operation *owner = u.getOwner();
+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(owner)) {
+ privateValue = storeOp.getMemref();
+ owner->erase();
+ }
+ }
+ mlir::Value originalIv = retrievePrivatizedIv(accLoopOp, privateValue);
+ assert(originalIv && "Expect induction variable to be found");
+ ivs.push_back(originalIv);
+ }
+ return ivs;
+}
+
+void LoopExpand::runOnOperation() {
+ mlir::func::FuncOp func = getOperation();
+
+ mlir::ModuleOp mod = func->getParentOfType<mlir::ModuleOp>();
+ fir::KindMapping kindMap = fir::getKindMapping(mod);
+ fir::FirOpBuilder builder{mod, std::move(kindMap)};
+
+ func.walk([&](mlir::acc::LoopOp accLoopOp) {
+ mlir::Location loc = accLoopOp.getLoc();
+ mlir::Type idxTy = builder.getIndexType();
+
+ bool isStructured = accLoopOp.getLoopRegions().front()->hasOneBlock();
+ bool finalCountValue = isStructured;
+ unsigned nbLoop = accLoopOp.getBody().getNumArguments();
+
+ // Gather original (non-privatized) induction variables.
+ llvm::SmallVector<mlir::Value> ivs = getOriginalInductionVars(accLoopOp);
+
+ // Remove block arguments in order to create loop-nest and move current body
+ // in the newly created loop nest.
+ accLoopOp.getBody().eraseArguments(0, nbLoop);
+ builder.setInsertionPointAfter(accLoopOp);
+
+ if (!isStructured) {
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ return;
+ }
+
+ llvm::SmallVector<mlir::Value> lbs, ubs, steps;
+ llvm::SmallVector<fir::DoLoopOp> loops;
+
+ // Create the loop nest, move the acc.loop body inside and move the loop
+ // nest inside the acc.loop again.
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ bool isInnerLoop = i == (nbLoop - 1);
+
+ lbs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getLowerbound()[i]));
+ ubs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getUpperbound()[i]));
+ steps.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getStep()[i]));
+ fir::DoLoopOp doLoopOp = builder.create<fir::DoLoopOp>(
+ loc, lbs[i], ubs[i], steps[i], /*unordered=*/false, finalCountValue,
+ mlir::ValueRange{accLoopOp.getLowerbound()[i]});
+ loops.push_back(doLoopOp);
+
+ if (isInnerLoop) {
+ // Move acc.loop body inside the newly created fir.do_loop.
+ accLoopOp.getBody().getTerminator()->erase();
+ doLoopOp.getRegion().takeBody(*accLoopOp.getLoopRegions().front());
+ // Recreate the block arguments.
+ doLoopOp.getBody()->addArgument(builder.getIndexType(), loc);
+ doLoopOp.getBody()->addArgument(accLoopOp.getLowerbound()[i].getType(),
+ loc);
+ } else {
+ builder.setInsertionPointToStart(doLoopOp.getBody());
+ }
+ }
+
+ // Move the loop nest inside the acc.loop region.
+ mlir::Block *newAccLoopBlock =
+ builder.createBlock(accLoopOp.getLoopRegions().front());
+ loops[0].getOperation()->moveBefore(newAccLoopBlock,
+ newAccLoopBlock->end());
+
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ builder.setInsertionPointToStart(loops[i].getBody());
+ builder.create<fir::StoreOp>(loc, loops[i].getBody()->getArgument(1),
+ ivs[i]);
+
+ builder.setInsertionPointToEnd(loops[i].getBody());
+ llvm::SmallVector<mlir::Value, 2> results;
+ if (finalCountValue)
+ results.push_back(builder.create<mlir::arith::AddIOp>(
+ loc, loops[i].getInductionVar(), loops[i].getStep()));
+
+ // Step loopVariable to help optimizations such as vectorization.
+ // Induction variable elimination will clean up as necessary.
+ mlir::Value convStep = builder.create<fir::ConvertOp>(
+ loc, accLoopOp.getStep()[i].getType(), loops[i].getStep());
+ mlir::Value loopVar = builder.create<fir::LoadOp>(loc, ivs[i]);
+ results.push_back(
+ builder.create<mlir::arith::AddIOp>(loc, loopVar, convStep));
+ builder.create<fir::ResultOp>(loc, results);
+
+ // Convert ops have been created outside of the acc.loop operation. They
+ // need to be moved back before their uses.
+ lbs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ ubs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ steps[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ }
+
+ builder.setInsertionPointToEnd(newAccLoopBlock);
+ builder.create<mlir::acc::YieldOp>(loc);
+
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ });
+}
+
+std::unique_ptr<mlir::Pass> fir::createOpenACCLoopExpandPass() {
+ return std::make_unique<LoopExpand>();
+}
diff --git a/flang/test/Fir/OpenACC/loop-expand.f90 b/flang/test/Fir/OpenACC/loop-expand.f90
new file mode 100644
index 0000000000000..2efb2b2bd7533
--- /dev/null
+++ b/flang/test/Fir/OpenACC/loop-expand.f90
@@ -0,0 +1,118 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | fir-opt --split-input-file --acc-loop-expand | FileCheck %s
+
+subroutine singleloop(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPsingleloop
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingleloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine single_loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsingle_loop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingle_loop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: fir.do_loop
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop collapse(2)
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %1 {uniq_name = "_QFloop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[J:.*]]:2 = hlfir.declare %3 {uniq_name = "_QFloop_with_nestEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: %[[LB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[LB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG3:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] iter_args(%[[ARG4:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG4]] to %[[J]]#1 : !fir.ref<i32>
+
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG3]], %[[STEP1]] : index
+! CHECK: %[[CONV_STEP1:.*]] = fir.convert %[[STEP1]] : (index) -> i32
+! CHECK: %[[LOAD_J:.*]] = fir.load %[[J]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_J]], %[[CONV_STEP1]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP0:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %18 : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_unstructured(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ if (a(i) > 0.0) stop 'stop'
+ a(i) = i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_unstructured
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 391e77e0c4081..62ab100847619 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1218,6 +1218,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
/// The i-th data operand passed.
Value getDataOperand(unsigned i);
+
+ Block &getBody() { return getLoopRegions().front()->front(); }
}];
let hasCustomAssemblyFormat = 1;
@@ -1237,7 +1239,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
)
$region
( `(` type($results)^ `)` )?
- attr-dict-with-keyword
+ attr-dict-with-keyword
}];
let hasVerifier = 1;
>From 7dbf9f61e504d97c82dc99d5f58ae3917d17ce77 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 1 Dec 2023 15:15:44 -0800
Subject: [PATCH 2/3] Fix iteration arg type
---
.../Transforms/OpenACCLoopExpand.cpp | 23 ++++++----
flang/test/Fir/OpenACC/loop-expand.f90 | 46 ++++++++++++++++---
2 files changed, 53 insertions(+), 16 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
index 3d712c9ea8d7a..f345fa0bef0c3 100644
--- a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
+++ b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
@@ -93,7 +93,7 @@ void LoopExpand::runOnOperation() {
return;
}
- llvm::SmallVector<mlir::Value> lbs, ubs, steps;
+ llvm::SmallVector<mlir::Value> lbs, ubs, steps, iterArgs;
llvm::SmallVector<fir::DoLoopOp> loops;
// Create the loop nest, move the acc.loop body inside and move the loop
@@ -107,9 +107,10 @@ void LoopExpand::runOnOperation() {
builder.createConvert(loc, idxTy, accLoopOp.getUpperbound()[i]));
steps.push_back(
builder.createConvert(loc, idxTy, accLoopOp.getStep()[i]));
+ iterArgs.push_back(builder.createConvert(loc, fir::unwrapRefType(ivs[i].getType()), accLoopOp.getLowerbound()[i]));
fir::DoLoopOp doLoopOp = builder.create<fir::DoLoopOp>(
loc, lbs[i], ubs[i], steps[i], /*unordered=*/false, finalCountValue,
- mlir::ValueRange{accLoopOp.getLowerbound()[i]});
+ mlir::ValueRange{iterArgs[i]});
loops.push_back(doLoopOp);
if (isInnerLoop) {
@@ -118,8 +119,7 @@ void LoopExpand::runOnOperation() {
doLoopOp.getRegion().takeBody(*accLoopOp.getLoopRegions().front());
// Recreate the block arguments.
doLoopOp.getBody()->addArgument(builder.getIndexType(), loc);
- doLoopOp.getBody()->addArgument(accLoopOp.getLowerbound()[i].getType(),
- loc);
+ doLoopOp.getBody()->addArgument(iterArgs[i].getType(), loc);
} else {
builder.setInsertionPointToStart(doLoopOp.getBody());
}
@@ -144,18 +144,23 @@ void LoopExpand::runOnOperation() {
// Step loopVariable to help optimizations such as vectorization.
// Induction variable elimination will clean up as necessary.
- mlir::Value convStep = builder.create<fir::ConvertOp>(
- loc, accLoopOp.getStep()[i].getType(), loops[i].getStep());
mlir::Value loopVar = builder.create<fir::LoadOp>(loc, ivs[i]);
+ mlir::Value convStep = builder.create<fir::ConvertOp>(
+ loc, loopVar.getType(), loops[i].getStep());
results.push_back(
builder.create<mlir::arith::AddIOp>(loc, loopVar, convStep));
builder.create<fir::ResultOp>(loc, results);
// Convert ops have been created outside of the acc.loop operation. They
// need to be moved back before their uses.
- lbs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
- ubs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
- steps[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ if (mlir::isa<fir::ConvertOp>(lbs[i].getDefiningOp()))
+ lbs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ if (mlir::isa<fir::ConvertOp>(ubs[i].getDefiningOp()))
+ ubs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ if (mlir::isa<fir::ConvertOp>(steps[i].getDefiningOp()))
+ steps[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ if (mlir::isa<fir::ConvertOp>(iterArgs[i].getDefiningOp()))
+ iterArgs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
}
builder.setInsertionPointToEnd(newAccLoopBlock);
diff --git a/flang/test/Fir/OpenACC/loop-expand.f90 b/flang/test/Fir/OpenACC/loop-expand.f90
index 2efb2b2bd7533..ab4bca22f7c87 100644
--- a/flang/test/Fir/OpenACC/loop-expand.f90
+++ b/flang/test/Fir/OpenACC/loop-expand.f90
@@ -19,8 +19,8 @@ subroutine singleloop(a)
! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
-! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
! CHECK: }
@@ -51,8 +51,8 @@ subroutine single_loop_with_nest(a)
! CHECK: fir.do_loop
! CHECK: }
! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
-! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
! CHECK: }
@@ -73,8 +73,8 @@ subroutine loop_with_nest(a)
end subroutine
! CHECK-LABEL: func.func @_QPloop_with_nest
-! CHECK: %[[I:.*]]:2 = hlfir.declare %1 {uniq_name = "_QFloop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[J:.*]]:2 = hlfir.declare %3 {uniq_name = "_QFloop_with_nestEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFloop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFloop_with_nestEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK: %[[LB0:.*]] = fir.convert %{{.*}} : (i32) -> index
! CHECK: %[[UB0:.*]] = fir.convert %{{.*}} : (i32) -> index
@@ -88,15 +88,15 @@ subroutine loop_with_nest(a)
! CHECK: fir.store %[[ARG4]] to %[[J]]#1 : !fir.ref<i32>
! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG3]], %[[STEP1]] : index
-! CHECK: %[[CONV_STEP1:.*]] = fir.convert %[[STEP1]] : (index) -> i32
! CHECK: %[[LOAD_J:.*]] = fir.load %[[J]]#1 : !fir.ref<i32>
+! CHECK: %[[CONV_STEP1:.*]] = fir.convert %[[STEP1]] : (index) -> i32
! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_J]], %[[CONV_STEP1]] : i32
! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
! CHECK: }
! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
-! CHECK: %[[CONV_STEP0:.*]] = fir.convert %[[STEP0]] : (index) -> i32
! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
-! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %18 : i32
+! CHECK: %[[CONV_STEP0:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP0]] : i32
! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
! CHECK: }
! CHECK: acc.yield
@@ -116,3 +116,35 @@ subroutine loop_unstructured(a)
! CHECK-LABEL: func.func @_QPloop_unstructured
! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>)
+
+subroutine loop_iv_8()
+ integer(4), parameter :: N = 10
+ integer(8) :: ii
+ real(4) :: array(N)
+
+ !$acc parallel loop
+ do ii = 1, N
+ array(ii) = ii
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_iv_8()
+
+! CHECK: %[[II:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFloop_iv_8Eii"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: acc.loop
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %[[ITER_ARG:.*]] = fir.convert %c1_i32 : (i32) -> i64
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG1:.*]] = %[[ITER_ARG]]) -> (index, i64) {
+! CHECK: fir.store %[[ARG1]] to %[[II]]#1 : !fir.ref<i64>
+
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG0]], %[[STEP0]] : index
+! CHECK: %[[LOAD_II:.*]] = fir.load %[[II]]#1 : !fir.ref<i64>
+! CHECK: %[[CONV_STEP0:.*]] = fir.convert %[[STEP0]] : (index) -> i64
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_II]], %[[CONV_STEP0]] : i64
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i64
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
\ No newline at end of file
>From 2e9d7952485b572830be79f9cab42f7f35aeafbd Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 5 Dec 2023 10:48:38 -0800
Subject: [PATCH 3/3] Clean up privatization list
---
.../Transforms/OpenACCLoopExpand.cpp | 57 +++++++++++++++++++
flang/test/Fir/OpenACC/loop-expand.f90 | 10 ++--
2 files changed, 62 insertions(+), 5 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
index f345fa0bef0c3..0a9f96bac24fe 100644
--- a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
+++ b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
@@ -65,6 +65,60 @@ getOriginalInductionVars(mlir::acc::LoopOp &accLoopOp) {
return ivs;
}
+static unsigned getOperandPosition(const mlir::OperandRange &operands,
+ mlir::Value val) {
+ unsigned pos = 0;
+ for (auto v : operands) {
+ if (v == val)
+ return pos;
+ ++pos;
+ }
+ return UINT_MAX;
+}
+
+static void clearIVPrivatizations(llvm::SmallVector<mlir::Value> &ivs,
+ mlir::acc::LoopOp &accLoopOp,
+ mlir::MLIRContext *context) {
+ // Collect all of the private operations associated with IVs.
+ llvm::SmallVector<mlir::Value> privOps;
+ for (auto priv : accLoopOp.getPrivateOperands()) {
+ mlir::Value varPtr{mlir::acc::getVarPtr(priv.getDefiningOp())};
+ assert(varPtr && "must be able to extract varPtr from acc private op");
+ if (llvm::find(ivs, varPtr) != ivs.end()) {
+ privOps.push_back(priv);
+ }
+ }
+
+ // Next remove the private operations associated with IVs.
+ for (auto priv : privOps) {
+ llvm::errs() << priv << "\n";
+ mlir::Value varPtr{mlir::acc::getVarPtr(priv.getDefiningOp())};
+ auto pos = getOperandPosition(accLoopOp.getPrivateOperands(), priv);
+
+ // 1) Replace all uses of the private var with the varPtr.
+ priv.replaceAllUsesWith(varPtr);
+
+ // 2) Manually handle the loop operation since the operand list containing
+ accLoopOp.getPrivateOperandsMutable().erase(pos);
+ std::vector<mlir::Attribute> updatedRecipesList;
+ for (auto [index, attribute] :
+ llvm::enumerate(accLoopOp.getPrivatizationsAttr())) {
+ if (index != pos) {
+ updatedRecipesList.push_back(attribute);
+ }
+ }
+ if (updatedRecipesList.empty()) {
+ accLoopOp.removePrivatizationsAttr();
+ } else {
+ accLoopOp.setPrivatizationsAttr(
+ mlir::ArrayAttr::get(context, updatedRecipesList));
+ }
+
+ // 3) Now remove the private op.
+ priv.getDefiningOp()->erase();
+ }
+}
+
void LoopExpand::runOnOperation() {
mlir::func::FuncOp func = getOperation();
@@ -83,6 +137,9 @@ void LoopExpand::runOnOperation() {
// Gather original (non-privatized) induction variables.
llvm::SmallVector<mlir::Value> ivs = getOriginalInductionVars(accLoopOp);
+ // Clear the privatization list from privatized IVs.
+ clearIVPrivatizations(ivs, accLoopOp, &getContext());
+
// Remove block arguments in order to create loop-nest and move current body
// in the newly created loop nest.
accLoopOp.getBody().eraseArguments(0, nbLoop);
diff --git a/flang/test/Fir/OpenACC/loop-expand.f90 b/flang/test/Fir/OpenACC/loop-expand.f90
index ab4bca22f7c87..461271daceec7 100644
--- a/flang/test/Fir/OpenACC/loop-expand.f90
+++ b/flang/test/Fir/OpenACC/loop-expand.f90
@@ -12,7 +12,7 @@ subroutine singleloop(a)
end subroutine
! CHECK-LABEL: func.func @_QPsingleloop
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingleloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop private
+! CHECK: acc.loop {
! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
@@ -42,7 +42,7 @@ subroutine single_loop_with_nest(a)
! CHECK-LABEL: func.func @_QPsingle_loop_with_nest
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingle_loop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop private
+! CHECK: acc.loop {
! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
@@ -75,7 +75,7 @@ subroutine loop_with_nest(a)
! CHECK-LABEL: func.func @_QPloop_with_nest
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFloop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFloop_with_nestEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: acc.loop {
! CHECK: %[[LB0:.*]] = fir.convert %{{.*}} : (i32) -> index
! CHECK: %[[UB0:.*]] = fir.convert %{{.*}} : (i32) -> index
! CHECK: %[[STEP0:.*]] = fir.convert %{{.*}} : (i32) -> index
@@ -115,7 +115,7 @@ subroutine loop_unstructured(a)
end subroutine
! CHECK-LABEL: func.func @_QPloop_unstructured
-! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>)
+! CHECK: acc.loop {
subroutine loop_iv_8()
integer(4), parameter :: N = 10
@@ -131,7 +131,7 @@ subroutine loop_iv_8()
! CHECK-LABEL: func.func @_QPloop_iv_8()
! CHECK: %[[II:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFloop_iv_8Eii"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-! CHECK: acc.loop
+! CHECK: acc.loop {
! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
More information about the flang-commits
mailing list