[llvm-branch-commits] [flang] [flang][OpenMP] Extend `do concurrent` mapping to device (PR #155987)
Kareem Ergawy via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Aug 31 23:26:40 PDT 2025
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/155987
>From 0373863b919e59130dcf57593f4283ece0dff12a Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 29 Aug 2025 02:04:49 -0500
Subject: [PATCH] [flang][OpenMP] Extend `do concurrent` mapping to device
Upstreams further parts of `do concurrent` to OpenMP conversion pass
from AMD's fork. This PR extends the pass by adding support for mapping
to the device.
---
flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 +
.../OpenMP/DoConcurrentConversion.cpp | 400 +++++++++++++++++-
.../Transforms/DoConcurrent/basic_device.f90 | 83 ++++
.../Transforms/DoConcurrent/basic_device.mlir | 10 +-
4 files changed, 476 insertions(+), 18 deletions(-)
create mode 100644 flang/test/Transforms/DoConcurrent/basic_device.f90
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index e0aebd0714c8f..b85ee7e861a4f 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -26,6 +26,7 @@ add_flang_library(FlangOpenMPTransforms
FIRSupport
FortranSupport
HLFIRDialect
+ FortranUtils
MLIR_DEPS
${dialect_libs}
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index c928b76065ade..e975b86a6ba0d 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -6,17 +6,22 @@
//
//===----------------------------------------------------------------------===//
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/OpenMP/Utils.h"
#include "flang/Support/OpenMP-utils.h"
+#include "flang/Utils/OpenMP.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
namespace flangomp {
#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
@@ -107,6 +112,33 @@ struct InductionVariableInfo {
using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>;
+/// Collect the list of values used inside the loop but defined outside of it.
+void collectLoopLiveIns(fir::DoConcurrentLoopOp loop,
+ llvm::SmallVectorImpl<mlir::Value> &liveIns) {
+ llvm::SmallDenseSet<mlir::Value> seenValues;
+ llvm::SmallDenseSet<mlir::Operation *> seenOps;
+
+ for (auto [lb, ub, st] : llvm::zip_equal(
+ loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
+ liveIns.push_back(lb);
+ liveIns.push_back(ub);
+ liveIns.push_back(st);
+ }
+
+ mlir::visitUsedValuesDefinedAbove(
+ loop.getRegion(), [&](mlir::OpOperand *operand) {
+ if (!seenValues.insert(operand->get()).second)
+ return;
+
+ mlir::Operation *definingOp = operand->get().getDefiningOp();
+ // We want to collect ops corresponding to live-ins only once.
+ if (definingOp && !seenOps.insert(definingOp).second)
+ return;
+
+ liveIns.push_back(operand->get());
+ });
+}
+
/// Collects values that are local to a loop: "loop-local values". A loop-local
/// value is one that is used exclusively inside the loop but allocated outside
/// of it. This usually corresponds to temporary values that are used inside the
@@ -182,10 +214,6 @@ class DoConcurrentConversion
mlir::LogicalResult
matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
- if (mapToDevice)
- return doLoop.emitError(
- "not yet implemented: Mapping `do concurrent` loops to device");
-
looputils::InductionVariableInfos ivInfos;
auto loop = mlir::cast<fir::DoConcurrentLoopOp>(
doLoop.getRegion().back().getTerminator());
@@ -196,20 +224,72 @@ class DoConcurrentConversion
for (mlir::Value indVar : *indVars)
ivInfos.emplace_back(loop, indVar);
+ llvm::SmallVector<mlir::Value> loopNestLiveIns;
+ looputils::collectLoopLiveIns(loop, loopNestLiveIns);
+ assert(!loopNestLiveIns.empty());
+
llvm::SetVector<mlir::Value> locals;
looputils::collectLoopLocalValues(loop, locals);
+ // We do not want to map "loop-local" values to the device through
+ // `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
+ loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns,
+ [&](mlir::Value liveIn) {
+ return locals.contains(liveIn);
+ }),
+ loopNestLiveIns.end());
+
+ mlir::omp::TargetOp targetOp;
+ mlir::omp::LoopNestOperands loopNestClauseOps;
+
mlir::IRMapping mapper;
+
+ if (mapToDevice) {
+ mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>();
+ bool isTargetDevice =
+ llvm::cast<mlir::omp::OffloadModuleInterface>(*module)
+ .getIsTargetDevice();
+
+ mlir::omp::TargetOperands targetClauseOps;
+ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
+ loopNestClauseOps,
+ isTargetDevice ? nullptr : &targetClauseOps);
+
+ LiveInShapeInfoMap liveInShapeInfoMap;
+ fir::FirOpBuilder builder(
+ rewriter,
+ fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>()));
+
+ for (mlir::Value liveIn : loopNestLiveIns) {
+ targetClauseOps.mapVars.push_back(
+ genMapInfoOpForLiveIn(builder, liveIn));
+ liveInShapeInfoMap.insert(
+ {liveIn, TargetDeclareShapeCreationInfo(liveIn)});
+ }
+
+ targetOp =
+ genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
+ targetClauseOps, loopNestClauseOps, liveInShapeInfoMap);
+ genTeamsOp(doLoop.getLoc(), rewriter);
+ }
+
mlir::omp::ParallelOp parallelOp =
genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper);
- mlir::omp::LoopNestOperands loopNestClauseOps;
- genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
- loopNestClauseOps);
+
+ // Only set as composite when part of `distribute parallel do`.
+ parallelOp.setComposite(mapToDevice);
+
+ if (!mapToDevice)
+ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
+ loopNestClauseOps);
for (mlir::Value local : locals)
looputils::localizeLoopLocalValue(local, parallelOp.getRegion(),
rewriter);
+ if (mapToDevice)
+ genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true);
+
mlir::omp::LoopNestOp ompLoopNest =
genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps,
/*isComposite=*/mapToDevice);
@@ -244,6 +324,51 @@ class DoConcurrentConversion
}
private:
+ struct TargetDeclareShapeCreationInfo {
+ // Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to
+ // interface more easily `ShapeShiftOp::getOrigins()` which returns
+ // `std::vector`.
+ std::vector<mlir::Value> startIndices{};
+ std::vector<mlir::Value> extents{};
+
+ TargetDeclareShapeCreationInfo(mlir::Value liveIn) {
+ mlir::Value shape = nullptr;
+ mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
+ auto declareOp =
+ mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
+
+ if (declareOp != nullptr)
+ shape = declareOp.getShape();
+
+ if (shape == nullptr)
+ return;
+
+ auto shapeOp =
+ mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp());
+ auto shapeShiftOp =
+ mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp());
+
+ if (shapeOp == nullptr && shapeShiftOp == nullptr)
+ TODO(liveIn.getLoc(),
+ "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are"
+ "not supported yet.");
+
+ if (shapeShiftOp != nullptr)
+ startIndices = shapeShiftOp.getOrigins();
+
+ extents = shapeOp != nullptr
+ ? std::vector<mlir::Value>(shapeOp.getExtents().begin(),
+ shapeOp.getExtents().end())
+ : shapeShiftOp.getExtents();
+ }
+
+ bool isShapedValue() const { return !extents.empty(); }
+ bool isShapeShiftedValue() const { return !startIndices.empty(); }
+ };
+
+ using LiveInShapeInfoMap =
+ llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>;
+
mlir::omp::ParallelOp
genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
looputils::InductionVariableInfos &ivInfos,
@@ -284,11 +409,11 @@ class DoConcurrentConversion
return result;
}
- void
- genLoopNestClauseOps(mlir::Location loc,
- mlir::ConversionPatternRewriter &rewriter,
- fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
- mlir::omp::LoopNestOperands &loopNestClauseOps) const {
+ void genLoopNestClauseOps(
+ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+ fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
+ mlir::omp::LoopNestOperands &loopNestClauseOps,
+ mlir::omp::TargetOperands *targetClauseOps = nullptr) const {
assert(loopNestClauseOps.loopLowerBounds.empty() &&
"Loop nest bounds were already emitted!");
@@ -297,11 +422,19 @@ class DoConcurrentConversion
bounds.push_back(var.getDefiningOp()->getResult(0));
};
+ auto hostEvalCapture = [&](mlir::Value var,
+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
+ populateBounds(var, bounds);
+
+ if (targetClauseOps)
+ targetClauseOps->hostEvalVars.push_back(var);
+ };
+
for (auto [lb, ub, st] : llvm::zip_equal(
loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
- populateBounds(lb, loopNestClauseOps.loopLowerBounds);
- populateBounds(ub, loopNestClauseOps.loopUpperBounds);
- populateBounds(st, loopNestClauseOps.loopSteps);
+ hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds);
+ hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds);
+ hostEvalCapture(st, loopNestClauseOps.loopSteps);
}
loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
@@ -439,6 +572,243 @@ class DoConcurrentConversion
return loopNestOp;
}
+ void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn,
+ mlir::Value rawAddr,
+ llvm::SmallVectorImpl<mlir::Value> &boundsOps) const {
+ fir::ExtendedValue extVal =
+ hlfir::translateToExtendedValue(rawAddr.getLoc(), builder,
+ hlfir::Entity{liveIn},
+ /*contiguousHint=*/
+ true)
+ .first;
+ fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr(
+ builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc());
+ boundsOps = fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+ mlir::omp::MapBoundsType>(
+ builder, info, extVal,
+ /*dataExvIsAssumedSize=*/false, rawAddr.getLoc());
+ }
+
+ mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder,
+ mlir::Value liveIn) const {
+ mlir::Value rawAddr = liveIn;
+ llvm::StringRef name;
+
+ mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
+ auto declareOp =
+ mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
+
+ if (declareOp != nullptr) {
+ // Use the raw address to avoid unboxing `fir.box` values whenever
+ // possible. Put differently, if we have access to the direct value memory
+ // reference/address, we use it.
+ rawAddr = declareOp.getOriginalBase();
+ name = declareOp.getUniqName();
+ }
+
+ if (!llvm::isa<mlir::omp::PointerLikeType>(rawAddr.getType())) {
+ builder.setInsertionPointAfter(liveInDefiningOp);
+ auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType());
+ builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal);
+ rawAddr = copyVal;
+ }
+
+ mlir::Type liveInType = liveIn.getType();
+ mlir::Type eleType = liveInType;
+ if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType))
+ eleType = refType.getElementType();
+
+ llvm::omp::OpenMPOffloadMappingFlags mapFlag =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::VariableCaptureKind captureKind =
+ mlir::omp::VariableCaptureKind::ByRef;
+
+ if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
+ captureKind = mlir::omp::VariableCaptureKind::ByCopy;
+ } else if (!fir::isa_builtin_cptr_type(eleType)) {
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ }
+
+ llvm::SmallVector<mlir::Value> boundsOps;
+ genBoundsOps(builder, liveIn, rawAddr, boundsOps);
+
+ return Fortran::utils::openmp::createMapInfoOp(
+ builder, liveIn.getLoc(), rawAddr,
+ /*varPtrPtr=*/{}, name.str(), boundsOps,
+ /*members=*/{},
+ /*membersIndex=*/mlir::ArrayAttr{},
+ static_cast<
+ std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+ mapFlag),
+ captureKind, rawAddr.getType());
+ }
+
+ mlir::omp::TargetOp
+ genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+ mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars,
+ mlir::omp::TargetOperands &clauseOps,
+ mlir::omp::LoopNestOperands &loopNestClauseOps,
+ const LiveInShapeInfoMap &liveInShapeInfoMap) const {
+ auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps);
+ auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
+
+ mlir::Region ®ion = targetOp.getRegion();
+
+ llvm::SmallVector<mlir::Type> regionArgTypes;
+ llvm::SmallVector<mlir::Location> regionArgLocs;
+
+ for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars,
+ clauseOps.mapVars)) {
+ regionArgTypes.push_back(var.getType());
+ regionArgLocs.push_back(var.getLoc());
+ }
+
+ rewriter.createBlock(®ion, {}, regionArgTypes, regionArgLocs);
+ fir::FirOpBuilder builder(
+ rewriter,
+ fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));
+
+ // Within the loop, it possible that we discover other values that need to
+ // mapped to the target region (the shape info values for arrays, for
+ // example). Therefore, the map block args might be extended and resized.
+ // Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make
+ // sure we access the proper vector of data.
+ int idx = 0;
+ for (auto [mapInfoOp, mappedVar] :
+ llvm::zip_equal(clauseOps.mapVars, mappedVars)) {
+ auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
+ hlfir::DeclareOp liveInDeclare =
+ genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx],
+ miOp, liveInShapeInfoMap.at(mappedVar));
+ ++idx;
+
+ // TODO If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
+ // need to "unpack" the box by getting the defining op of it's value.
+ // However, we did not hit this case in reality yet so leaving it as a
+ // todo for now.
+
+ auto mapHostValueToDevice = [&](mlir::Value hostValue,
+ mlir::Value deviceValue) {
+ if (!llvm::isa<mlir::omp::PointerLikeType>(hostValue.getType()))
+ mapper.map(hostValue,
+ builder.loadIfRef(hostValue.getLoc(), deviceValue));
+ else
+ mapper.map(hostValue, deviceValue);
+ };
+
+ mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase());
+
+ if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
+ mappedVar.getDefiningOp()))
+ mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase());
+ }
+
+ for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(),
+ clauseOps.hostEvalVars))
+ mapper.map(hostEval, arg);
+
+ for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) {
+ loopNestClauseOps.loopLowerBounds[i] =
+ mapper.lookup(loopNestClauseOps.loopLowerBounds[i]);
+ loopNestClauseOps.loopUpperBounds[i] =
+ mapper.lookup(loopNestClauseOps.loopUpperBounds[i]);
+ loopNestClauseOps.loopSteps[i] =
+ mapper.lookup(loopNestClauseOps.loopSteps[i]);
+ }
+
+ // Check if cloning the bounds introduced any dependency on the outer
+ // region. If so, then either clone them as well if they are
+ // MemoryEffectFree, or else copy them to a new temporary and add them to
+ // the map and block_argument lists and replace their uses with the new
+ // temporary.
+ Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp);
+ rewriter.setInsertionPoint(
+ rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc()));
+
+ return targetOp;
+ }
+
+ hlfir::DeclareOp genLiveInDeclare(
+ fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp,
+ mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp,
+ const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const {
+ mlir::Type liveInType = liveInArg.getType();
+ std::string liveInName = liveInMapInfoOp.getName().has_value()
+ ? liveInMapInfoOp.getName().value().str()
+ : std::string("");
+ if (fir::isa_ref_type(liveInType))
+ liveInType = fir::unwrapRefType(liveInType);
+
+ mlir::Value shape = [&]() -> mlir::Value {
+ if (!targetShapeCreationInfo.isShapedValue())
+ return {};
+
+ llvm::SmallVector<mlir::Value> extentOperands;
+ llvm::SmallVector<mlir::Value> startIndexOperands;
+
+ if (targetShapeCreationInfo.isShapeShiftedValue()) {
+ llvm::SmallVector<mlir::Value> shapeShiftOperands;
+
+ size_t shapeIdx = 0;
+ for (auto [startIndex, extent] :
+ llvm::zip_equal(targetShapeCreationInfo.startIndices,
+ targetShapeCreationInfo.extents)) {
+ shapeShiftOperands.push_back(
+ Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, startIndex,
+ liveInName + ".start_idx.dim" + std::to_string(shapeIdx)));
+ shapeShiftOperands.push_back(
+ Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, extent,
+ liveInName + ".extent.dim" + std::to_string(shapeIdx)));
+ ++shapeIdx;
+ }
+
+ auto shapeShiftType = fir::ShapeShiftType::get(
+ builder.getContext(), shapeShiftOperands.size() / 2);
+ return builder.create<fir::ShapeShiftOp>(
+ liveInArg.getLoc(), shapeShiftType, shapeShiftOperands);
+ }
+
+ llvm::SmallVector<mlir::Value> shapeOperands;
+ size_t shapeIdx = 0;
+ for (auto extent : targetShapeCreationInfo.extents) {
+ shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, extent,
+ liveInName + ".extent.dim" + std::to_string(shapeIdx)));
+ ++shapeIdx;
+ }
+
+ return builder.create<fir::ShapeOp>(liveInArg.getLoc(), shapeOperands);
+ }();
+
+ return builder.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg,
+ liveInName, shape);
+ }
+
+ mlir::omp::TeamsOp
+ genTeamsOp(mlir::Location loc,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto teamsOp = rewriter.create<mlir::omp::TeamsOp>(
+ loc, /*clauses=*/mlir::omp::TeamsOperands{});
+
+ rewriter.createBlock(&teamsOp.getRegion());
+ rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
+
+ return teamsOp;
+ }
+
+ mlir::omp::DistributeOp
+ genDistributeOp(mlir::Location loc,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto distOp = rewriter.create<mlir::omp::DistributeOp>(
+ loc, /*clauses=*/mlir::omp::DistributeOperands{});
+
+ rewriter.createBlock(&distOp.getRegion());
+ return distOp;
+ }
+
bool mapToDevice;
llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip;
mlir::SymbolTable &moduleSymbolTable;
diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90
new file mode 100644
index 0000000000000..7bce696387646
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/basic_device.f90
@@ -0,0 +1,83 @@
+! Tests mapping of a basic `do concurrent` loop to
+! `!$omp target teams distribute parallel do`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+
+program do_concurrent_basic
+ implicit none
+ integer :: a(10)
+ integer :: i
+
+ ! CHECK-DAG: %[[I_ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "i"}
+ ! CHECK: %[[I_ORIG_DECL:.*]]:2 = hlfir.declare %[[I_ORIG_ALLOC]]
+
+ ! CHECK-DAG: %[[A_ADDR:.*]] = fir.address_of(@_QFEa)
+ ! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXTENT:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[A_ORIG_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]](%[[A_SHAPE]])
+
+ ! CHECK-NOT: fir.do_loop
+
+ ! CHECK: %[[C1:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[HOST_LB:.*]] = fir.convert %[[C1]] : (i32) -> index
+ ! CHECK: %[[C10:.*]] = arith.constant 10 : i32
+ ! CHECK: %[[HOST_UB:.*]] = fir.convert %[[C10]] : (i32) -> index
+ ! CHECK: %[[HOST_STEP:.*]] = arith.constant 1 : index
+
+ ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1
+ ! CHECK: %[[C0:.*]] = arith.constant 0 : index
+ ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %{{c1.*}} : index
+
+ ! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index)
+ ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index)
+ ! CHECK-SAME: extent(%[[A_EXTENT]] : index)
+
+ ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}})
+ ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]])
+
+ ! CHECK: omp.target
+ ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index)
+ ! CHECK-SAME: map_entries(
+ ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+ ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+ ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+ ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]],
+ ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]]
+
+ ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]]
+ ! CHECK: omp.teams {
+ ! CHECK-NEXT: omp.parallel {
+
+ ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"}
+ ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+ ! CHECK-NEXT: omp.distribute {
+ ! CHECK-NEXT: omp.wsloop {
+
+ ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+ ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32
+ ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#0 : !fir.ref<i32>
+ ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
+ ! CHECK-NEXT: %[[IV_VAL2:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
+ ! CHECK-NEXT: %[[IV_VAL_I64:.*]] = fir.convert %[[IV_VAL2]] : (i32) -> i64
+ ! CHECK-NEXT: %[[ARR_ACCESS:.*]] = hlfir.designate %[[A_DEV_DECL]]#0 (%[[IV_VAL_I64]]) : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
+ ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref<i32>
+ ! CHECK-NEXT: omp.yield
+ ! CHECK-NEXT: }
+
+ ! CHECK-NEXT: } {omp.composite}
+ ! CHECK-NEXT: } {omp.composite}
+ ! CHECK-NEXT: omp.terminator
+ ! CHECK-NEXT: } {omp.composite}
+ ! CHECK-NEXT: omp.terminator
+ ! CHECK-NEXT: }
+ ! CHECK-NEXT: omp.terminator
+ ! CHECK-NEXT: }
+ do concurrent (i=1:10)
+ a(i) = i
+ end do
+
+ ! CHECK-NOT: fir.do_loop
+end program do_concurrent_basic
diff --git a/flang/test/Transforms/DoConcurrent/basic_device.mlir b/flang/test/Transforms/DoConcurrent/basic_device.mlir
index 0ca48943864c8..fa511c3d46d58 100644
--- a/flang/test/Transforms/DoConcurrent/basic_device.mlir
+++ b/flang/test/Transforms/DoConcurrent/basic_device.mlir
@@ -1,4 +1,4 @@
-// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" -verify-diagnostics %s
+// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" %s -o - | FileCheck %s
func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_basic"} {
%2 = fir.address_of(@_QFEa) : !fir.ref<!fir.array<10xi32>>
@@ -11,8 +11,12 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas
%8 = fir.convert %c10_i32 : (i32) -> index
%c1 = arith.constant 1 : index
- // expected-error at +2 {{not yet implemented: Mapping `do concurrent` loops to device}}
- // expected-error at below {{failed to legalize operation 'fir.do_concurrent'}}
+ // CHECK: omp.target
+ // CHECK: omp.teams
+ // CHECK: omp.parallel
+ // CHECK: omp.distribute
+ // CHECK: omp.wsloop
+ // CHECK: omp.loop_nest
fir.do_concurrent {
%0 = fir.alloca i32 {bindc_name = "i"}
%1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
More information about the llvm-branch-commits
mailing list