[flang-commits] [flang] [llvm] [mlir] [flang][OpenMP] Enable tiling (PR #143715)
Jan Leyonberg via flang-commits
flang-commits at lists.llvm.org
Tue Aug 19 07:56:25 PDT 2025
https://github.com/jsjodin updated https://github.com/llvm/llvm-project/pull/143715
>From ae8110483973b449c4704887055e34a4d73a2e2b Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 14 Mar 2025 11:06:51 -0400
Subject: [PATCH 01/26] Initial implementation of tiling.
---
flang/include/flang/Lower/OpenMP.h | 1 -
flang/lib/Lower/OpenMP/OpenMP.cpp | 70 +++++--
flang/lib/Lower/OpenMP/Utils.cpp | 33 +++-
flang/lib/Semantics/canonicalize-omp.cpp | 44 ++++-
flang/lib/Semantics/resolve-directives.cpp | 176 ++++++++++++++----
.../Frontend/OpenMP/ConstructDecompositionT.h | 24 +++
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 9 +
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 +++
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +-
.../mlir/Dialect/OpenMP/OpenMPClauses.td | 32 ++++
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +-
.../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 3 +-
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 52 +++++-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 65 +++++--
14 files changed, 458 insertions(+), 86 deletions(-)
diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 581c93f76d627..df01a7b82c66c 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -80,7 +80,6 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &,
void genOpenMPSymbolProperties(AbstractConverter &converter,
const pft::Variable &var);
-int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList);
void genThreadprivateOp(AbstractConverter &, const pft::Variable &);
void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &);
bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &);
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..f7a3b08b7f54b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -404,6 +404,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
return;
const parser::OmpClauseList *beginClauseList = nullptr;
+ const parser::OmpClauseList *middleClauseList = nullptr;
const parser::OmpClauseList *endClauseList = nullptr;
common::visit(
common::visitors{
@@ -418,6 +419,22 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
beginClauseList =
&std::get<parser::OmpClauseList>(beginDirective.t);
+ // FIXME(JAN): For now we check if there is an inner
+ // OpenMPLoopConstruct, and extract the size clause from there
+ const auto &innerOptional = std::get<std::optional<
+ common::Indirection<parser::OpenMPLoopConstruct>>>(
+ ompConstruct.t);
+ if (innerOptional.has_value()) {
+ const auto &innerLoopDirective = innerOptional.value().value();
+ const auto &innerBegin =
+ std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+ const auto &innerDirective =
+ std::get<parser::OmpLoopDirective>(innerBegin.t);
+ if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
+ middleClauseList =
+ &std::get<parser::OmpClauseList>(innerBegin.t);
+ }
+ }
if (auto &endDirective =
std::get<std::optional<parser::OmpEndLoopDirective>>(
ompConstruct.t)) {
@@ -431,6 +448,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
assert(beginClauseList && "expected begin directive");
clauses.append(makeClauses(*beginClauseList, semaCtx));
+ if (middleClauseList)
+ clauses.append(makeClauses(*middleClauseList, semaCtx));
+
if (endClauseList)
clauses.append(makeClauses(*endClauseList, semaCtx));
};
@@ -910,6 +930,7 @@ static void genLoopVars(
storeOp =
createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
}
+
firOpBuilder.setInsertionPointAfter(storeOp);
}
@@ -1660,6 +1681,23 @@ genLoopNestClauses(lower::AbstractConverter &converter,
cp.processCollapse(loc, eval, clauseOps, iv);
clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
+
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ for (auto &clause : clauses) {
+ if (clause.id == llvm::omp::Clause::OMPC_collapse) {
+ const auto &collapse = std::get<clause::Collapse>(clause.u);
+ int64_t collapseValue = evaluate::ToInt64(collapse.v).value();
+ clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue);
+ } else if (clause.id == llvm::omp::Clause::OMPC_sizes) {
+ const auto &sizes = std::get<clause::Sizes>(clause.u);
+ llvm::SmallVector<int64_t> sizeValues;
+ for (auto &size : sizes.v) {
+ int64_t sizeValue = evaluate::ToInt64(size).value();
+ sizeValues.push_back(sizeValue);
+ }
+ clauseOps.tileSizes = sizeValues;
+ }
+ }
}
static void genLoopClauses(
@@ -2036,9 +2074,9 @@ static mlir::omp::LoopNestOp genLoopNestOp(
return llvm::SmallVector<const semantics::Symbol *>(iv);
};
- auto *nestedEval =
- getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
-
+ uint64_t nestValue = getCollapseValue(item->clauses);
+ nestValue = nestValue < iv.size() ? iv.size() : nestValue;
+ auto *nestedEval = getCollapsedLoopEval(eval, nestValue);
return genOpWithBody<mlir::omp::LoopNestOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
directive)
@@ -3890,6 +3928,20 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
std::get<parser::OmpBeginLoopDirective>(loopConstruct.t);
List<Clause> clauses = makeClauses(
std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
+
+ const auto &innerOptional = std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(loopConstruct.t);
+ if (innerOptional.has_value()) {
+ const auto &innerLoopDirective = innerOptional.value().value();
+ const auto &innerBegin =
+ std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+ const auto &innerDirective =
+ std::get<parser::OmpLoopDirective>(innerBegin.t);
+ if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
+ clauses.append(
+ makeClauses(std::get<parser::OmpClauseList>(innerBegin.t), semaCtx));
+ }
+ }
+
if (auto &endLoopDirective =
std::get<std::optional<parser::OmpEndLoopDirective>>(
loopConstruct.t)) {
@@ -4021,18 +4073,6 @@ void Fortran::lower::genOpenMPSymbolProperties(
lower::genDeclareTargetIntGlobal(converter, var);
}
-int64_t
-Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
- for (const parser::OmpClause &clause : clauseList.v) {
- if (const auto &collapseClause =
- std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
- const auto *expr = semantics::GetExpr(collapseClause->v);
- return evaluate::ToInt64(*expr).value();
- }
- }
- return 1;
-}
-
void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
const lower::pft::Variable &var) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 77b1e39083aa6..11721d05001b0 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -38,14 +38,22 @@ namespace lower {
namespace omp {
int64_t getCollapseValue(const List<Clause> &clauses) {
- auto iter = llvm::find_if(clauses, [](const Clause &clause) {
- return clause.id == llvm::omp::Clause::OMPC_collapse;
- });
- if (iter != clauses.end()) {
- const auto &collapse = std::get<clause::Collapse>(iter->u);
- return evaluate::ToInt64(collapse.v).value();
+ int64_t collapseValue = 1;
+ int64_t numTileSizes = 0;
+ for (auto &clause : clauses) {
+ if (clause.id == llvm::omp::Clause::OMPC_collapse) {
+ const auto &collapse = std::get<clause::Collapse>(clause.u);
+ collapseValue = evaluate::ToInt64(collapse.v).value();
+ } else if (clause.id == llvm::omp::Clause::OMPC_sizes) {
+ const auto &sizes = std::get<clause::Sizes>(clause.u);
+ numTileSizes = sizes.v.size();
+ }
}
- return 1;
+
+ collapseValue = collapseValue - numTileSizes;
+ int64_t result =
+ collapseValue > numTileSizes ? collapseValue : numTileSizes;
+ return result;
}
void genObjectList(const ObjectList &objects,
@@ -613,6 +621,7 @@ bool collectLoopRelatedInfo(
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
+
bool found = false;
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -628,7 +637,16 @@ bool collectLoopRelatedInfo(
collapseValue = evaluate::ToInt64(clause->v).value();
found = true;
}
+ std::int64_t sizesLengthValue = 0l;
+ if (auto *clause =
+ ClauseFinder::findUniqueClause<omp::clause::Sizes>(clauses)) {
+ sizesLengthValue = clause->v.size();
+ found = true;
+ }
+ collapseValue = collapseValue - sizesLengthValue;
+ collapseValue =
+ collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue;
std::size_t loopVarTypeSize = 0;
do {
lower::pft::Evaluation *doLoop =
@@ -661,7 +679,6 @@ bool collectLoopRelatedInfo(
} while (collapseValue > 0);
convertLoopBounds(converter, currentLocation, result, loopVarTypeSize);
-
return found;
}
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 9722eca19447d..fb0bb0f923574 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -11,6 +11,7 @@
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/semantics.h"
+# include <stack>
// After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
// Constructs more structured which provide explicit scopes for later
// structural checks and semantic analysis.
@@ -117,15 +118,17 @@ class CanonicalizationOfOmp {
// in the same iteration
//
// Original:
- // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
- // OmpBeginLoopDirective
+ // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
+ // OmpBeginLoopDirective t-> OmpLoopDirective
+ // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u->
+ /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile]
// ExecutableConstruct -> DoConstruct
+ // [ExecutableConstruct -> OmpEndLoopDirective]
// ExecutableConstruct -> OmpEndLoopDirective (if available)
//
// After rewriting:
- // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
- // OmpBeginLoopDirective
- // DoConstruct
+ // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
+ // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct
// OmpEndLoopDirective (if available)
parser::Block::iterator nextIt;
auto &beginDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
@@ -147,20 +150,41 @@ class CanonicalizationOfOmp {
if (GetConstructIf<parser::CompilerDirective>(*nextIt))
continue;
+ // Keep track of the loops to handle the end loop directives
+ std::stack<parser::OpenMPLoopConstruct *> loops;
+ loops.push(&x);
+ while (auto *innerConstruct{
+ GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
+ if (auto *innerOmpLoop{
+ std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
+ std::get<
+ std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+ loops.top()->t) = std::move(*innerOmpLoop);
+ // Retrieveing the address so that DoConstruct or inner loop can be
+ // set later.
+ loops.push(&(std::get<std::optional<
+ common::Indirection<parser::OpenMPLoopConstruct>>>(
+ loops.top()->t)
+ .value()
+ .value()));
+ nextIt = block.erase(nextIt);
+ }
+ }
if (auto *doCons{GetConstructIf<parser::DoConstruct>(*nextIt)}) {
if (doCons->GetLoopControl()) {
// move DoConstruct
std::get<std::optional<std::variant<parser::DoConstruct,
- common::Indirection<parser::OpenMPLoopConstruct>>>>(x.t) =
- std::move(*doCons);
+ common::Indirection<parser::OpenMPLoopConstruct>>>>(
+ loops.top()->t) = std::move(*doCons);
nextIt = block.erase(nextIt);
// try to match OmpEndLoopDirective
- if (nextIt != block.end()) {
+ while (nextIt != block.end() && !loops.empty()) {
if (auto *endDir{
GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
- std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
- std::move(*endDir);
+ std::get<std::optional<parser::OmpEndLoopDirective>>(
+ loops.top()->t) = std::move(*endDir);
nextIt = block.erase(nextIt);
+ loops.pop();
}
}
} else {
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index fe0d2a73805de..3ad0f73607b44 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -816,7 +816,28 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
const parser::OmpClause *GetAssociatedClause() { return associatedClause; }
private:
+ std::int64_t SetAssociatedMaxClause(llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ std::int64_t GetAssociatedLoopLevelFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &);
std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &);
+ void CollectAssociatedLoopLevelsFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ void CollectAssociatedLoopLevelsFromInnerLoopContruct(
+ const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ template <typename T>
+ void CollectAssociatedLoopLevelFromClauseValue(
+ const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ template <typename T>
+ void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &,
+ llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &,
+ llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared,
Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate,
@@ -1821,7 +1842,6 @@ bool OmpAttributeVisitor::Pre(
bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
const auto &beginDir{std::get<parser::OmpLoopDirective>(beginLoopDir.t)};
- const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)};
switch (beginDir.v) {
case llvm::omp::Directive::OMPD_distribute:
case llvm::omp::Directive::OMPD_distribute_parallel_do:
@@ -1872,7 +1892,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
beginDir.v == llvm::omp::Directive::OMPD_target_loop)
IssueNonConformanceWarning(beginDir.v, beginDir.source, 52);
ClearDataSharingAttributeObjects();
- SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList));
+ SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x));
if (beginDir.v == llvm::omp::Directive::OMPD_do) {
auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t);
@@ -1886,7 +1906,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
}
}
PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x);
- ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1;
+ ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1;
return true;
}
@@ -1974,44 +1994,124 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) {
return true;
}
+static bool isSizesClause(const parser::OmpClause *clause) {
+ return std::holds_alternative<parser::OmpClause::Sizes>(clause->u);
+}
+
+std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause(
+ llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+
+ // Find the tile level to know how much to reduce the level for collapse
+ std::int64_t tileLevel = 0;
+ for (auto [level, clause] : llvm::zip_equal(levels, clauses)) {
+ if (isSizesClause(clause)) {
+ tileLevel = level;
+ }
+ }
+
+ std::int64_t maxLevel = 1;
+ const parser::OmpClause *maxClause = nullptr;
+ for (auto [level, clause] : llvm::zip_equal(levels, clauses)) {
+ if (tileLevel > 0 && tileLevel < level) {
+ context_.Say(clause->source,
+ "The value of the parameter in the COLLAPSE clause must"
+ " not be larger than the number of the number of tiled loops"
+ " because collapse relies on independent loop iterations."_err_en_US);
+ return 1;
+ }
+
+ if (!isSizesClause(clause)) {
+ level = level - tileLevel;
+ }
+
+ if (level > maxLevel) {
+ maxLevel = level;
+ maxClause = clause;
+ }
+ }
+ if (maxClause)
+ SetAssociatedClause(maxClause);
+ return maxLevel;
+}
+
+std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &x) {
+ llvm::SmallVector<std::int64_t> levels;
+ llvm::SmallVector<const parser::OmpClause *> clauses;
+
+ CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses);
+ return SetAssociatedMaxClause(levels, clauses);
+}
+
std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses(
const parser::OmpClauseList &x) {
- std::int64_t orderedLevel{0};
- std::int64_t collapseLevel{0};
+ llvm::SmallVector<std::int64_t> levels;
+ llvm::SmallVector<const parser::OmpClause *> clauses;
- const parser::OmpClause *ordClause{nullptr};
- const parser::OmpClause *collClause{nullptr};
+ CollectAssociatedLoopLevelsFromClauses(x, levels, clauses);
+ return SetAssociatedMaxClause(levels, clauses);
+}
- for (const auto &clause : x.v) {
- if (const auto *orderedClause{
- std::get_if<parser::OmpClause::Ordered>(&clause.u)}) {
- if (const auto v{EvaluateInt64(context_, orderedClause->v)}) {
- orderedLevel = *v;
- }
- ordClause = &clause;
- }
- if (const auto *collapseClause{
- std::get_if<parser::OmpClause::Collapse>(&clause.u)}) {
- if (const auto v{EvaluateInt64(context_, collapseClause->v)}) {
- collapseLevel = *v;
- }
- collClause = &clause;
+void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &x,
+ llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+ const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
+ const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)};
+
+ CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses);
+ CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses);
+}
+
+void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct(
+ const parser::OpenMPLoopConstruct &x,
+ llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+ const auto &innerOptional =
+ std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+ x.t);
+ if (innerOptional.has_value()) {
+ CollectAssociatedLoopLevelsFromLoopConstruct(
+ innerOptional.value().value(), levels, clauses);
+ }
+}
+
+template <typename T>
+void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue(
+ const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+ if (const auto tclause{std::get_if<T>(&clause.u)}) {
+ std::int64_t level = 0;
+ if (const auto v{EvaluateInt64(context_, tclause->v)}) {
+ level = *v;
}
+ levels.push_back(level);
+ clauses.push_back(&clause);
}
+}
- if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) {
- SetAssociatedClause(ordClause);
- return orderedLevel;
- } else if (!orderedLevel && collapseLevel) {
- SetAssociatedClause(collClause);
- return collapseLevel;
- } else {
- SetAssociatedClause(nullptr);
+template <typename T>
+void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize(
+ const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+ if (const auto tclause{std::get_if<T>(&clause.u)}) {
+ levels.push_back(tclause->v.size());
+ clauses.push_back(&clause);
}
- // orderedLevel < collapseLevel is an error handled in structural
- // checks
+}
- return 1; // default is outermost loop
+void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses(
+ const parser::OmpClauseList &x, llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+ for (const auto &clause : x.v) {
+ CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Ordered>(
+ clause, levels, clauses);
+ CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Collapse>(
+ clause, levels, clauses);
+ CollectAssociatedLoopLevelFromClauseSize<parser::OmpClause::Sizes>(
+ clause, levels, clauses);
+ }
}
// 2.15.1.1 Data-sharing Attribute Rules - Predetermined
@@ -2043,10 +2143,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
const parser::OmpClause *clause{GetAssociatedClause()};
bool hasCollapseClause{
clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false};
+ const parser::OpenMPLoopConstruct *innerMostLoop = &x;
+
+ while (auto &optLoopCons{
+ std::get<std::optional<parser::NestedConstruct>>(x.t)}) {
+ if (const auto &innerLoop{
+ std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) {
+ innerMostLoop = &innerLoop.value().value();
+ }
+ }
- auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t);
if (optLoopCons.has_value()) {
- if (const auto &outer{std::get_if<parser::DoConstruct>(&*optLoopCons)}) {
+ if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostLoop->t)}) {
for (const parser::DoConstruct *loop{&*outer}; loop && level > 0;
--level) {
if (loop->IsDoConcurrent()) {
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index 047baa3a79f5d..83db78667c7f8 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -209,6 +209,8 @@ struct ConstructDecompositionT {
bool applyClause(const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
const ClauseTy *);
+ bool applyClause(const tomp::clause::SizesT<TypeTy, IdTy, ExprTy> &clause,
+ const ClauseTy *);
bool applyClause(const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
const ClauseTy *);
bool
@@ -482,6 +484,28 @@ bool ConstructDecompositionT<C, H>::applyClause(
return false;
}
+// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+ const tomp::clause::SizesT<TypeTy, IdTy, ExprTy> &clause,
+ const ClauseTy *node) {
+ // Apply "sizes" to the innermost directive. If it's not one that
+ // allows it flag an error.
+ if (!leafs.empty()) {
+ auto &last = leafs.back();
+
+ if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) {
+ last.clauses.push_back(node);
+ return true;
+ } else {
+ llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
+ last.clauses.push_back(node);
+ return true;
+ }
+ }
+
+ return false;
+}
// PRIVATE
// [5.2:111:5-7]
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index f70659120e1e6..287690d652e58 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2255,6 +2255,9 @@ class OpenMPIRBuilder {
/// Return the function that contains the region to be outlined.
Function *getFunction() const { return EntryBB->getParent(); }
+
+ /// Dump the info in a somewhat readable way
+ void dump();
};
/// Collection of regions that need to be outlined during finalization.
@@ -2275,6 +2278,9 @@ class OpenMPIRBuilder {
/// Add a new region that will be outlined later.
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
+ /// Dump outline infos
+ void dumpOutlineInfos();
+
/// An ordered map of auto-generated variables to their unique names.
/// It stores variables with the following names: 1) ".gomp_critical_user_" +
/// <critical_section_name> + ".var" for "omp critical" directives; 2)
@@ -3907,6 +3913,9 @@ class CanonicalLoopInfo {
/// Invalidate this loop. That is, the underlying IR does not fulfill the
/// requirements of an OpenMP canonical loop anymore.
LLVM_ABI void invalidate();
+
+ /// Dump the info in a somewhat readable way
+ void dump();
};
/// ScanInfo holds the information to assist in lowering of Scan reduction.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index c16b0dde1a3da..9a8453f0cb251 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -9144,6 +9144,15 @@ Error OpenMPIRBuilder::emitOffloadingArrays(
return Error::success();
}
+void OpenMPIRBuilder::dumpOutlineInfos() {
+ errs() << "=== Outline Infos Begin ===\n";
+ for (auto En : enumerate(OutlineInfos)) {
+ errs() << "[" << En.index() << "]: ";
+ En.value().dump();
+ }
+ errs() << "=== Outline Infos End ===\n";
+}
+
void OpenMPIRBuilder::emitBranch(BasicBlock *Target) {
BasicBlock *CurBB = Builder.GetInsertBlock();
@@ -10068,6 +10077,14 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
}
}
+void OpenMPIRBuilder::OutlineInfo::dump() {
+ errs() << "=== OutilneInfo == "
+ << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a")
+ << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a")
+ << " OuterAllocaBB: "
+ << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n";
+}
+
void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
uint64_t Size, int32_t Flags,
GlobalValue::LinkageTypes,
@@ -10845,3 +10862,10 @@ void CanonicalLoopInfo::invalidate() {
Latch = nullptr;
Exit = nullptr;
}
+
+void CanonicalLoopInfo::dump() {
+ errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a")
+ << " Cond: " << (Cond ? Cond->getName() : "n/a")
+ << " Latch: " << (Latch ? Latch->getName() : "n/a")
+ << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n";
+}
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index bbd1ed6a3ab2d..7ad1e70cb6e75 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -810,7 +810,11 @@ void CodeExtractor::severSplitPHINodesOfExits() {
}
void CodeExtractor::splitReturnBlocks() {
- for (BasicBlock *Block : Blocks)
+ for (BasicBlock *Block : Blocks) {
+ if (!Block->getTerminator()) {
+ errs() << "====== No terminator in block: " << Block->getName()
+ << "======\n";
+ }
if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
BasicBlock *New =
Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
@@ -827,6 +831,7 @@ void CodeExtractor::splitReturnBlocks() {
DT->changeImmediateDominator(I, NewNode);
}
}
+ }
}
Function *CodeExtractor::constructFunctionDeclaration(
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index 311c57fb4446c..eb836db890738 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -317,6 +317,38 @@ class OpenMP_DeviceClauseSkip<
def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>;
+//===----------------------------------------------------------------------===//
+// V5.2: [XX.X] `collapse` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_CollapseClauseSkip<
+ bit traits = false, bit arguments = false, bit assemblyFormat = false,
+ bit description = false, bit extraClassDeclaration = false
+ > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+ extraClassDeclaration> {
+ let arguments = (ins
+ DefaultValuedOptionalAttr<I64Attr, "1">:$num_collapse
+ );
+}
+
+def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>;
+
+//===----------------------------------------------------------------------===//
+// V5.2: [xx.x] `sizes` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_TileSizesClauseSkip<
+ bit traits = false, bit arguments = false, bit assemblyFormat = false,
+ bit description = false, bit extraClassDeclaration = false
+ > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+ extraClassDeclaration> {
+ let arguments = (ins
+ OptionalAttr<DenseI64ArrayAttr>:$tile_sizes
+ );
+}
+
+def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>;
+
//===----------------------------------------------------------------------===//
// V5.2: [11.6.1] `dist_schedule` clause
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index be114ea4fb631..0b13c526b7f03 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -614,7 +614,9 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [
def LoopNestOp : OpenMP_Op<"loop_nest", traits = [
RecursiveMemoryEffects, SameVariadicOperandSize
], clauses = [
- OpenMP_LoopRelatedClause
+ OpenMP_LoopRelatedClause,
+ OpenMP_CollapseClause,
+ OpenMP_TileSizesClause
], singleRegion = true> {
let summary = "rectangular loop nest";
let description = [{
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index 34f372af1e4b5..bec17258d058f 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -493,7 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
// Create loop nest and populate region with contents of scf.parallel.
auto loopOp = omp::LoopNestOp::create(
rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(),
- parallelOp.getUpperBound(), parallelOp.getStep());
+ parallelOp.getUpperBound(), parallelOp.getStep(), false, 1,
+ nullptr);
rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
loopOp.getRegion().begin());
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index c1c1767ef90b0..0bef00fed1ba0 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -56,6 +56,11 @@ makeDenseBoolArrayAttr(MLIRContext *ctx, const ArrayRef<bool> boolArray) {
return boolArray.empty() ? nullptr : DenseBoolArrayAttr::get(ctx, boolArray);
}
+static DenseI64ArrayAttr
+makeDenseI64ArrayAttr(MLIRContext *ctx, const ArrayRef<int64_t> intArray) {
+ return intArray.empty() ? nullptr : DenseI64ArrayAttr::get(ctx, intArray);
+}
+
namespace {
struct MemRefPointerLikeModel
: public PointerLikeType::ExternalModel<MemRefPointerLikeModel,
@@ -2956,10 +2961,10 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
for (auto &iv : ivs)
iv.type = loopVarType;
+ auto ctx = parser.getBuilder().getContext();
// Parse "inclusive" flag.
if (succeeded(parser.parseOptionalKeyword("inclusive")))
- result.addAttribute("loop_inclusive",
- UnitAttr::get(parser.getBuilder().getContext()));
+ result.addAttribute("loop_inclusive", UnitAttr::get(ctx));
// Parse step values.
SmallVector<OpAsmParser::UnresolvedOperand> steps;
@@ -2967,6 +2972,38 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren))
return failure();
+ // Parse collapse
+ int64_t value = 0;
+ if (!parser.parseOptionalKeyword("collapse") &&
+ (parser.parseLParen() || parser.parseInteger(value) ||
+ parser.parseRParen()))
+ return failure();
+ if (value > 1) {
+ result.addAttribute(
+ "num_collapse",
+ IntegerAttr::get(parser.getBuilder().getI64Type(), value));
+ }
+
+ // Parse tiles
+ SmallVector<int64_t> tiles;
+ auto parseTiles = [&]() -> ParseResult {
+ int64_t tile;
+ if (parser.parseInteger(tile))
+ return failure();
+ tiles.push_back(tile);
+ return success();
+ };
+
+ if (!parser.parseOptionalKeyword("tiles") &&
+ (parser.parseLParen() ||
+ parser.parseCommaSeparatedList(parseTiles) ||
+ parser.parseRParen()))
+ return failure();
+
+ if (tiles.size() > 0) {
+ result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles));
+ }
+
// Parse the body.
Region *region = result.addRegion();
if (parser.parseRegion(*region, ivs))
@@ -2990,14 +3027,23 @@ void LoopNestOp::print(OpAsmPrinter &p) {
if (getLoopInclusive())
p << "inclusive ";
p << "step (" << getLoopSteps() << ") ";
+ if (int64_t numCollapse = getNumCollapse()) {
+ if (numCollapse > 1)
+ p << "collapse(" << numCollapse << ") ";
+ }
+ if (const auto tiles = getTileSizes()) {
+ p << "tiles(" << tiles.value() << ") ";
+ }
p.printRegion(region, /*printEntryBlockArgs=*/false);
}
void LoopNestOp::build(OpBuilder &builder, OperationState &state,
const LoopNestOperands &clauses) {
+ MLIRContext *ctx = builder.getContext();
LoopNestOp::build(builder, state, clauses.loopLowerBounds,
clauses.loopUpperBounds, clauses.loopSteps,
- clauses.loopInclusive);
+ clauses.loopInclusive, clauses.numCollapse,
+ makeDenseI64ArrayAttr(ctx, clauses.tileSizes));
}
LogicalResult LoopNestOp::verify() {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index eb96cb211fdd5..79ad778cc650e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2966,10 +2966,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
/// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
+ LLVM::ModuleTranslation &moduleTranslation) {
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
auto loopOp = cast<omp::LoopNestOp>(opInst);
-
// Set up the source location value for OpenMP runtime.
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
@@ -3035,18 +3034,60 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
loopInfos.push_back(*loopResult);
}
- // Collapse loops. Store the insertion point because LoopInfos may get
- // invalidated.
+ // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP();
llvm::OpenMPIRBuilder::InsertPointTy afterIP =
- loopInfos.front()->getAfterIP();
+ loopInfos.front()->getAfterIP();
- // Update the stack frame created for this loop to point to the resulting loop
- // after applying transformations.
- moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
- [&](OpenMPLoopInfoStackFrame &frame) {
- frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
- return WalkResult::interrupt();
- });
+ // Initialize the new loop info to the current one, in case there
+ // are no loop transformations done.
+ llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr;
+
+ // Do tiling
+ if (const auto &tiles = loopOp.getTileSizes()) {
+ llvm::Type *IVType = loopInfos.front()->getIndVarType();
+ SmallVector<llvm::Value *> TileSizes;
+
+ for (auto tile : tiles.value()) {
+ llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile);
+ TileSizes.push_back(TileVal);
+ }
+
+ std::vector<llvm::CanonicalLoopInfo*> NewLoops =
+ ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
+
+ // Collapse loops. Store the insertion point because LoopInfos may get
+ // invalidated.
+ auto AfterBB = NewLoops.front()->getAfter();
+ auto AfterAfterBB = AfterBB->getSingleSuccessor();
+ afterIP = {AfterAfterBB, AfterAfterBB->begin()};
+ NewTopLoopInfo = NewLoops[0];
+
+ // Update the loop infos
+ loopInfos.clear();
+ for (const auto& newLoop : NewLoops) {
+ loopInfos.push_back(newLoop);
+ }
+ } // Tiling done
+
+ // Do collapse
+ if (const auto &numCollapse = loopOp.getNumCollapse()) {
+ SmallVector<llvm::CanonicalLoopInfo *> collapseLoopInfos(
+ loopInfos.begin(), loopInfos.begin() + (numCollapse));
+
+ auto newLoopInfo =
+ ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {});
+ NewTopLoopInfo = newLoopInfo;
+ } // Collapse done
+
+ // Update the stack frame created for this loop to point to the resulting
+ // loop after applying transformations.
+ if (NewTopLoopInfo) {
+ moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
+ [&](OpenMPLoopInfoStackFrame &frame) {
+ frame.loopInfo = NewTopLoopInfo;
+ return WalkResult::interrupt();
+ });
+ }
// Continue building IR after the loop. Note that the LoopInfo returned by
// `collapseLoops` points inside the outermost loop and is intended for
>From dc9506e17038ac5cc85295626d3fb5bc96dfaa63 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Tue, 10 Jun 2025 09:51:02 -0400
Subject: [PATCH 02/26] Fix tests and limit the nesting of construct to only
tiling.
---
flang/lib/Semantics/canonicalize-omp.cpp | 34 ++++++++++++-------
.../Lower/OpenMP/parallel-wsloop-lastpriv.f90 | 4 +--
flang/test/Lower/OpenMP/simd.f90 | 2 +-
flang/test/Lower/OpenMP/wsloop-variable.f90 | 2 +-
flang/test/Semantics/OpenMP/do-collapse.f90 | 1 +
.../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +-
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 12 +++----
7 files changed, 33 insertions(+), 24 deletions(-)
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index fb0bb0f923574..10eaaa83f5f4f 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -149,27 +149,32 @@ class CanonicalizationOfOmp {
// Ignore compiler directives.
if (GetConstructIf<parser::CompilerDirective>(*nextIt))
continue;
-
// Keep track of the loops to handle the end loop directives
std::stack<parser::OpenMPLoopConstruct *> loops;
loops.push(&x);
- while (auto *innerConstruct{
+ if (auto *innerConstruct{
GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
if (auto *innerOmpLoop{
std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
- std::get<
- std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.top()->t) = std::move(*innerOmpLoop);
- // Retrieveing the address so that DoConstruct or inner loop can be
- // set later.
- loops.push(&(std::get<std::optional<
- common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.top()->t)
- .value()
- .value()));
- nextIt = block.erase(nextIt);
+ auto &innerBeginDir{
+ std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
+ auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
+ if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
+ std::get<std::optional<
+ common::Indirection<parser::OpenMPLoopConstruct>>>(
+ loops.top()->t) = std::move(*innerOmpLoop);
+ // Retrieveing the address so that DoConstruct or inner loop can be
+ // set later.
+ loops.push(&(std::get<std::optional<
+ common::Indirection<parser::OpenMPLoopConstruct>>>(
+ loops.top()->t)
+ .value()
+ .value()));
+ nextIt = block.erase(nextIt);
+ }
}
}
+
if (auto *doCons{GetConstructIf<parser::DoConstruct>(*nextIt)}) {
if (doCons->GetLoopControl()) {
// move DoConstruct
@@ -185,6 +190,9 @@ class CanonicalizationOfOmp {
loops.top()->t) = std::move(*endDir);
nextIt = block.erase(nextIt);
loops.pop();
+ } else {
+ // If there is a mismatch bail out.
+ break;
}
}
} else {
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
index 2890e78e9d17f..faf8f717f6308 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
@@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a)
! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
- ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) {
+ ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) {
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a)
! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
- ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) {
+ ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) {
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index d815474b84b31..3572b9baff00b 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n)
! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = (
! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to (
! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step (
- ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) {
+ ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) {
!$OMP SIMD COLLAPSE(2)
do i = 1, n
do j = 1, n
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index a7fb5fb8936e7..cceb77b974fee 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -23,7 +23,7 @@ program wsloop_variable
!CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
!CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
!CHECK: omp.wsloop private({{.*}}) {
-!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) {
!CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
!CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref<i16>
!CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref<i64>
diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90
index 480bd45b79b83..ec6a3bdad3686 100644
--- a/flang/test/Semantics/OpenMP/do-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-collapse.f90
@@ -31,6 +31,7 @@ program omp_doCollapse
end do
end do
+ !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
!ERROR: At most one COLLAPSE clause can appear on the SIMD directive
!$omp simd collapse(2) collapse(1)
do i = 1, 4
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
index 0ebcec0e0ec31..189e3bc57db86 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
@@ -9,7 +9,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : index) : i32
omp.wsloop {
- omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
+ omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) collapse(2) {
%1 = llvm.add %arg1, %arg2 : i32
%2 = llvm.mul %arg2, %loop_ub overflow<nsw> : i32
%3 = llvm.add %arg1, %2 :i32
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 3f4dcd5e24c56..27210bc0890ce 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -698,7 +698,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) {
// CHECK-LABEL: @simd_simple_multiple
llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
omp.simd {
- omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) {
+ omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) collapse(2) {
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
// The form of the emitted IR is controlled by OpenMPIRBuilder and
// tested there. Just check that the right metadata is added and collapsed
@@ -736,7 +736,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64
// CHECK-LABEL: @simd_simple_multiple_simdlen
llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
omp.simd simdlen(2) {
- omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+ omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
// The form of the emitted IR is controlled by OpenMPIRBuilder and
// tested there. Just check that the right metadata is added.
@@ -760,7 +760,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l
// CHECK-LABEL: @simd_simple_multiple_safelen
llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
omp.simd safelen(2) {
- omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+ omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
%4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32
@@ -779,7 +779,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l
// CHECK-LABEL: @simd_simple_multiple_simdlen_safelen
llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
omp.simd simdlen(1) safelen(2) {
- omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+ omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
%4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32
@@ -1177,7 +1177,7 @@ llvm.func @collapse_wsloop(
// CHECK: store i32 %[[TOTAL_SUB_1]], ptr
// CHECK: call void @__kmpc_for_static_init_4u
omp.wsloop {
- omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+ omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) {
%31 = llvm.load %20 : !llvm.ptr -> i32
%32 = llvm.add %31, %arg0 : i32
%33 = llvm.add %32, %arg1 : i32
@@ -1239,7 +1239,7 @@ llvm.func @collapse_wsloop_dynamic(
// CHECK: store i32 %[[TOTAL]], ptr
// CHECK: call void @__kmpc_dispatch_init_4u
omp.wsloop schedule(dynamic) {
- omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+ omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) {
%31 = llvm.load %20 : !llvm.ptr -> i32
%32 = llvm.add %31, %arg0 : i32
%33 = llvm.add %32, %arg1 : i32
>From 99f125eaaff3569c743bf29b043bdbd12526f034 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Tue, 10 Jun 2025 10:18:32 -0400
Subject: [PATCH 03/26] Enable stand-alone tiling, but it gives a warning and
converting to simd.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 42 ++++++++++++++++++---
flang/test/Lower/OpenMP/wsloop-collapse.f90 | 2 +-
2 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index f7a3b08b7f54b..d21d045baf8b3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2267,6 +2267,39 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
// Apply unrolling to it
auto cli = canonLoop.getCli();
mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
+
+static mlir::omp::LoopOp
+genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
+ mlir::omp::LoopOperands loopClauseOps;
+ llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
+ genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
+ loopReductionSyms);
+
+ DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+ /*shouldCollectPreDeterminedSymbols=*/true,
+ /*useDelayedPrivatization=*/true, symTable);
+ dsp.processStep1(&loopClauseOps);
+
+ mlir::omp::LoopNestOperands loopNestClauseOps;
+ llvm::SmallVector<const semantics::Symbol *> iv;
+ genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
+ loopNestClauseOps, iv);
+
+ EntryBlockArgs loopArgs;
+ loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
+ loopArgs.priv.vars = loopClauseOps.privateVars;
+ loopArgs.reduction.syms = loopReductionSyms;
+ loopArgs.reduction.vars = loopClauseOps.reductionVars;
+
+ auto loopOp =
+ genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
+ genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
+ loopNestClauseOps, iv, {{loopOp, loopArgs}},
+ llvm::omp::Directive::OMPD_loop, dsp);
+ return loopOp;
}
static mlir::omp::MaskedOp
@@ -3487,13 +3520,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
item);
break;
- case llvm::omp::Directive::OMPD_tile: {
- unsigned version = semaCtx.langOptions().OpenMPVersion;
- if (!semaCtx.langOptions().OpenMPSimd)
- TODO(loc, "Unhandled loop directive (" +
- llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+ case llvm::omp::Directive::OMPD_tile:
+ newOp =
+ genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
- }
case llvm::omp::Directive::OMPD_unroll:
genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
break;
diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90
index 7ec40ab4b2f43..677c7809c397f 100644
--- a/flang/test/Lower/OpenMP/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90
@@ -57,7 +57,7 @@ program wsloop_collapse
!CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
!CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32
!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
+!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) {
!$omp do collapse(3)
do i = 1, a
do j= 1, b
>From 20b990397ee5ed525d68d7d1375af12709464aa7 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Wed, 11 Jun 2025 10:25:00 -0400
Subject: [PATCH 04/26] Add minimal test, remove debug print.
---
flang/test/Lower/OpenMP/wsloop-tile.f90 | 30 +++++++++++++++++++
.../Frontend/OpenMP/ConstructDecompositionT.h | 2 +-
2 files changed, 31 insertions(+), 1 deletion(-)
create mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90
diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90
new file mode 100644
index 0000000000000..f43b558ce46bb
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-tile.f90
@@ -0,0 +1,30 @@
+! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse.
+
+! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s
+
+!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} {
+program wsloop_tile
+ integer :: i, j, k
+ integer :: a, b, c
+ integer :: x
+
+ a=30
+ b=20
+ c=50
+ x=0
+
+ !CHECK: omp.loop_nest
+ !CHECK-SAME: tiles(2, 5, 10)
+
+ !$omp do
+ !$omp tile sizes(2,5,10)
+ do i = 1, a
+ do j= 1, b
+ do k = 1, c
+ x = x + i + j + k
+ end do
+ end do
+ end do
+ !$omp end tile
+ !$omp end do
+end program wsloop_tile
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index 83db78667c7f8..e1083b7ef2cd9 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -498,7 +498,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
last.clauses.push_back(node);
return true;
} else {
- llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
+ // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
last.clauses.push_back(node);
return true;
}
>From 6d56b99f6e285ef43e328f3a1175e242b55c15a8 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 13 Jun 2025 09:53:10 -0400
Subject: [PATCH 05/26] Fix formatting
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 27 +++++++++++--------
flang/lib/Lower/OpenMP/Utils.cpp | 3 +--
flang/lib/Semantics/canonicalize-omp.cpp | 4 +--
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 3 +--
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 12 ++++-----
5 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d21d045baf8b3..0ad7acee33f26 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -427,9 +427,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
if (innerOptional.has_value()) {
const auto &innerLoopDirective = innerOptional.value().value();
const auto &innerBegin =
- std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+ std::get<parser::OmpBeginLoopDirective>(
+ innerLoopDirective.t);
const auto &innerDirective =
- std::get<parser::OmpLoopDirective>(innerBegin.t);
+ std::get<parser::OmpLoopDirective>(innerBegin.t);
if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
middleClauseList =
&std::get<parser::OmpClauseList>(innerBegin.t);
@@ -2268,11 +2269,13 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
auto cli = canonLoop.getCli();
mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
-static mlir::omp::LoopOp
-genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item) {
+static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter,
+ lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ mlir::Location loc,
+ const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
mlir::omp::LoopOperands loopClauseOps;
llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
@@ -3522,7 +3525,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
break;
case llvm::omp::Directive::OMPD_tile:
newOp =
- genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
+ genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_unroll:
genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
@@ -3959,13 +3962,15 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
List<Clause> clauses = makeClauses(
std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
- const auto &innerOptional = std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(loopConstruct.t);
+ const auto &innerOptional =
+ std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+ loopConstruct.t);
if (innerOptional.has_value()) {
const auto &innerLoopDirective = innerOptional.value().value();
const auto &innerBegin =
- std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+ std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
const auto &innerDirective =
- std::get<parser::OmpLoopDirective>(innerBegin.t);
+ std::get<parser::OmpLoopDirective>(innerBegin.t);
if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
clauses.append(
makeClauses(std::get<parser::OmpClauseList>(innerBegin.t), semaCtx));
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 11721d05001b0..69d74762ace6f 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -51,8 +51,7 @@ int64_t getCollapseValue(const List<Clause> &clauses) {
}
collapseValue = collapseValue - numTileSizes;
- int64_t result =
- collapseValue > numTileSizes ? collapseValue : numTileSizes;
+ int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes;
return result;
}
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 10eaaa83f5f4f..c519cb43628ed 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -11,7 +11,7 @@
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/semantics.h"
-# include <stack>
+#include <stack>
// After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
// Constructs more structured which provide explicit scopes for later
// structural checks and semantic analysis.
@@ -153,7 +153,7 @@ class CanonicalizationOfOmp {
std::stack<parser::OpenMPLoopConstruct *> loops;
loops.push(&x);
if (auto *innerConstruct{
- GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
+ GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
if (auto *innerOmpLoop{
std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
auto &innerBeginDir{
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 0bef00fed1ba0..e2b63afe48a23 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2995,8 +2995,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
};
if (!parser.parseOptionalKeyword("tiles") &&
- (parser.parseLParen() ||
- parser.parseCommaSeparatedList(parseTiles) ||
+ (parser.parseLParen() || parser.parseCommaSeparatedList(parseTiles) ||
parser.parseRParen()))
return failure();
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 79ad778cc650e..c5b7cc76eb17f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2966,7 +2966,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
/// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
+ LLVM::ModuleTranslation &moduleTranslation) {
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
auto loopOp = cast<omp::LoopNestOp>(opInst);
// Set up the source location value for OpenMP runtime.
@@ -3036,7 +3036,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
// llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP();
llvm::OpenMPIRBuilder::InsertPointTy afterIP =
- loopInfos.front()->getAfterIP();
+ loopInfos.front()->getAfterIP();
// Initialize the new loop info to the current one, in case there
// are no loop transformations done.
@@ -3048,12 +3048,12 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> TileSizes;
for (auto tile : tiles.value()) {
- llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile);
+ llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile);
TileSizes.push_back(TileVal);
}
- std::vector<llvm::CanonicalLoopInfo*> NewLoops =
- ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
+ std::vector<llvm::CanonicalLoopInfo *> NewLoops =
+ ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
// Collapse loops. Store the insertion point because LoopInfos may get
// invalidated.
@@ -3064,7 +3064,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
// Update the loop infos
loopInfos.clear();
- for (const auto& newLoop : NewLoops) {
+ for (const auto &newLoop : NewLoops) {
loopInfos.push_back(newLoop);
}
} // Tiling done
>From 69f0d94bee296d5c3719638bb2bad782727cb440 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 14 Jun 2025 06:29:58 -0400
Subject: [PATCH 06/26] Fix formatting
---
flang/lib/Semantics/canonicalize-omp.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index c519cb43628ed..1d00bdaad777c 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -166,10 +166,10 @@ class CanonicalizationOfOmp {
// Retrieveing the address so that DoConstruct or inner loop can be
// set later.
loops.push(&(std::get<std::optional<
- common::Indirection<parser::OpenMPLoopConstruct>>>(
+ common::Indirection<parser::OpenMPLoopConstruct>>>(
loops.top()->t)
- .value()
- .value()));
+ .value()
+ .value()));
nextIt = block.erase(nextIt);
}
}
>From 13ddc87015af7196cb4eaa46e700947efb08f010 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 19 Jun 2025 15:52:55 -0400
Subject: [PATCH 07/26] Fix test.
---
flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 | 1 +
1 file changed, 1 insertion(+)
diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
index bb1929249183b..355626f6e73b9 100644
--- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
@@ -1,6 +1,7 @@
!RUN: %python %S/../test_errors.py %s %flang -fopenmp
integer :: i, j
+! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause.
!$omp parallel do collapse(2)
do i = 1, 1
! ERROR: DO CONCURRENT loops cannot form part of a loop nest.
>From 605620280f74b405d431f1e5de74f59dc748ba3d Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 20 Jun 2025 07:31:02 -0400
Subject: [PATCH 08/26] Add more mlir tests. Set collapse value when lowering
from SCF to OpenMP.
---
.../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 4 +-
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 12 +++++
.../Conversion/SCFToOpenMP/scf-to-openmp.mlir | 2 +-
mlir/test/Dialect/OpenMP/invalid.mlir | 23 ++++++++
mlir/test/Dialect/OpenMP/ops.mlir | 54 +++++++++++++++++++
5 files changed, 92 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index bec17258d058f..f056e72531bfc 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -493,8 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
// Create loop nest and populate region with contents of scf.parallel.
auto loopOp = omp::LoopNestOp::create(
rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(),
- parallelOp.getUpperBound(), parallelOp.getStep(), false, 1,
- nullptr);
+ parallelOp.getUpperBound(), parallelOp.getStep(), false,
+ parallelOp.getLowerBound().size(), nullptr);
rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
loopOp.getRegion().begin());
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index e2b63afe48a23..9f6f9d9b91d0a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3058,6 +3058,18 @@ LogicalResult LoopNestOp::verify() {
<< "range argument type does not match corresponding IV type";
}
+ uint64_t numIVs = getIVs().size();
+
+ if (const auto &numCollapse = getNumCollapse())
+ if (numCollapse > numIVs)
+ return emitOpError()
+ << "collapse value is larger than the number of loops";
+
+ if (const auto &tiles = getTileSizes())
+ if (tiles.value().size() > numIVs)
+ return emitOpError()
+ << "number of tilings is larger than the number of loops";
+
if (!llvm::dyn_cast_if_present<LoopWrapperInterface>((*this)->getParentOp()))
return emitOpError() << "expects parent op to be a loop wrapper";
diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
index a722acbf2c347..d362bb6092419 100644
--- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
@@ -6,7 +6,7 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
// CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
// CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
// CHECK: omp.wsloop {
- // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+ // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) collapse(2) {
// CHECK: memref.alloca_scope
scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
// CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> ()
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 5088f2dfa7d7a..c6b4ae02602d9 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -157,6 +157,29 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) {
}
}
+// -----
+
+func.func @collapse_size(%lb : index, %ub : index, %step : index) {
+ omp.wsloop {
+ // expected-error at +1 {{collapse value is larger than the number of loops}}
+ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) collapse(4) {
+ omp.yield
+ }
+ }
+}
+
+// -----
+
+func.func @tiles_length(%lb : index, %ub : index, %step : index) {
+ omp.wsloop {
+ // expected-error at +1 {{number of tilings is larger than the number of loops}}
+ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) {
+ omp.yield
+ }
+ }
+}
+
+
// -----
func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 8c846cde1a3ca..e627a86e69185 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -376,6 +376,60 @@ func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32,
return
}
+// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse
+func.func @omp_loop_nest_pretty_multiple_collapse(%lb1 : i32, %ub1 : i32, %step1 : i32,
+ %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+
+ omp.wsloop {
+ // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2)
+ omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
+ %1 = "test.payload"(%iv1) : (i32) -> (index)
+ %2 = "test.payload"(%iv2) : (i32) -> (index)
+ memref.store %iv1, %data1[%1] : memref<?xi32>
+ memref.store %iv2, %data1[%2] : memref<?xi32>
+ omp.yield
+ }
+ }
+
+ return
+}
+
+// CHECK-LABEL: omp_loop_nest_pretty_multiple_tiles
+func.func @omp_loop_nest_pretty_multiple_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32,
+ %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+
+ omp.wsloop {
+ // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) tiles(5, 10)
+ omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) tiles(5, 10) {
+ %1 = "test.payload"(%iv1) : (i32) -> (index)
+ %2 = "test.payload"(%iv2) : (i32) -> (index)
+ memref.store %iv1, %data1[%1] : memref<?xi32>
+ memref.store %iv2, %data1[%2] : memref<?xi32>
+ omp.yield
+ }
+ }
+
+ return
+}
+
+// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse_tiles
+func.func @omp_loop_nest_pretty_multiple_collapse_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32,
+ %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+
+ omp.wsloop {
+ // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) tiles(5, 10)
+ omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) tiles(5, 10) {
+ %1 = "test.payload"(%iv1) : (i32) -> (index)
+ %2 = "test.payload"(%iv2) : (i32) -> (index)
+ memref.store %iv1, %data1[%1] : memref<?xi32>
+ memref.store %iv2, %data1[%2] : memref<?xi32>
+ omp.yield
+ }
+ }
+
+ return
+}
+
// CHECK-LABEL: omp_wsloop
func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
>From 6ddacf549b58837dda0281591650614d2e42a820 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 20 Jun 2025 07:52:28 -0400
Subject: [PATCH 09/26] Use llvm::SmallVector instead of std::stack
---
flang/lib/Semantics/canonicalize-omp.cpp | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 1d00bdaad777c..5264ec25fd80c 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -10,8 +10,6 @@
#include "flang/Parser/parse-tree-visitor.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/semantics.h"
-
-#include <stack>
// After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
// Constructs more structured which provide explicit scopes for later
// structural checks and semantic analysis.
@@ -150,8 +148,8 @@ class CanonicalizationOfOmp {
if (GetConstructIf<parser::CompilerDirective>(*nextIt))
continue;
// Keep track of the loops to handle the end loop directives
- std::stack<parser::OpenMPLoopConstruct *> loops;
- loops.push(&x);
+ llvm::SmallVector<parser::OpenMPLoopConstruct *> loops;
+ loops.push_back(&x);
if (auto *innerConstruct{
GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
if (auto *innerOmpLoop{
@@ -162,12 +160,12 @@ class CanonicalizationOfOmp {
if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
std::get<std::optional<
common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.top()->t) = std::move(*innerOmpLoop);
+ loops.back()->t) = std::move(*innerOmpLoop);
// Retrieveing the address so that DoConstruct or inner loop can be
// set later.
- loops.push(&(std::get<std::optional<
+ loops.push_back(&(std::get<std::optional<
common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.top()->t)
+ loops.back()->t)
.value()
.value()));
nextIt = block.erase(nextIt);
@@ -180,16 +178,16 @@ class CanonicalizationOfOmp {
// move DoConstruct
std::get<std::optional<std::variant<parser::DoConstruct,
common::Indirection<parser::OpenMPLoopConstruct>>>>(
- loops.top()->t) = std::move(*doCons);
+ loops.back()->t) = std::move(*doCons);
nextIt = block.erase(nextIt);
// try to match OmpEndLoopDirective
while (nextIt != block.end() && !loops.empty()) {
if (auto *endDir{
GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
std::get<std::optional<parser::OmpEndLoopDirective>>(
- loops.top()->t) = std::move(*endDir);
+ loops.back()->t) = std::move(*endDir);
nextIt = block.erase(nextIt);
- loops.pop();
+ loops.pop_back();
} else {
// If there is a mismatch bail out.
break;
>From b3a1da2b1d70203d8c51df2fb9f9790f81a6db79 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 09:11:58 -0400
Subject: [PATCH 10/26] Improve test a bit to make sure IVs are used as
expected.
---
flang/test/Lower/OpenMP/wsloop-tile.f90 | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90
index f43b558ce46bb..c9bf18e3b278d 100644
--- a/flang/test/Lower/OpenMP/wsloop-tile.f90
+++ b/flang/test/Lower/OpenMP/wsloop-tile.f90
@@ -13,7 +13,7 @@ program wsloop_tile
c=50
x=0
- !CHECK: omp.loop_nest
+ !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32
!CHECK-SAME: tiles(2, 5, 10)
!$omp do
@@ -21,6 +21,15 @@ program wsloop_tile
do i = 1, a
do j= 1, b
do k = 1, c
+ !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32
+ !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32
+ !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32
+ !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]]
+ !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32
+ !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]]
+ !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32
+ !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]]
+ !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32
x = x + i + j + k
end do
end do
>From cf6b1d6ee0dc1e26b794edf76851fc7607894e9a Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 09:30:57 -0400
Subject: [PATCH 11/26] Fix comments to clarify canonicalization.
---
flang/lib/Semantics/canonicalize-omp.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 5264ec25fd80c..d5b5b14d22dc2 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -119,13 +119,15 @@ class CanonicalizationOfOmp {
// ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
// OmpBeginLoopDirective t-> OmpLoopDirective
// [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u->
- /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile]
+ // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile]
// ExecutableConstruct -> DoConstruct
- // [ExecutableConstruct -> OmpEndLoopDirective]
+ // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile)
// ExecutableConstruct -> OmpEndLoopDirective (if available)
//
// After rewriting:
// ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
+ // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective
+ // OmpEndLoopDirective] (note: tile)
// OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct
// OmpEndLoopDirective (if available)
parser::Block::iterator nextIt;
>From 9d886dd26475bffc023cea5e750443a8184f4f16 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 12:05:40 -0400
Subject: [PATCH 12/26] Special handling of tile directive when dealing with
start end end loop directives.
---
flang/lib/Semantics/canonicalize-omp.cpp | 31 ++++++++++++++++--------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index d5b5b14d22dc2..a7749b5a81678 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -160,16 +160,13 @@ class CanonicalizationOfOmp {
std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
- std::get<std::optional<
+ auto &innerLoop = std::get<std::optional<
common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.back()->t) = std::move(*innerOmpLoop);
+ loops.back()->t);
+ innerLoop = std::move(*innerOmpLoop);
// Retrieveing the address so that DoConstruct or inner loop can be
// set later.
- loops.push_back(&(std::get<std::optional<
- common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.back()->t)
- .value()
- .value()));
+ loops.push_back(&(innerLoop.value().value()));
nextIt = block.erase(nextIt);
}
}
@@ -186,9 +183,23 @@ class CanonicalizationOfOmp {
while (nextIt != block.end() && !loops.empty()) {
if (auto *endDir{
GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
- std::get<std::optional<parser::OmpEndLoopDirective>>(
- loops.back()->t) = std::move(*endDir);
- nextIt = block.erase(nextIt);
+ auto &endOmpDirective{
+ std::get<parser::OmpLoopDirective>(endDir->t)};
+ auto &loopBegin{
+ std::get<parser::OmpBeginLoopDirective>(loops.back()->t)};
+ auto &loopDir{std::get<parser::OmpLoopDirective>(loopBegin.t)};
+
+ // If the directive is a tile we try to match the corresponding
+ // end tile if it exsists. If it is not a tile directive we
+ // always assign the end loop directive and fall back on the
+ // existing directive structure checks.
+ if (loopDir.v != llvm::omp::Directive::OMPD_tile ||
+ loopDir.v == endOmpDirective.v) {
+ std::get<std::optional<parser::OmpEndLoopDirective>>(
+ loops.back()->t) = std::move(*endDir);
+ nextIt = block.erase(nextIt);
+ }
+
loops.pop_back();
} else {
// If there is a mismatch bail out.
>From 323527d70c1c70250f80dbcc7c92a1aa745cfdae Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 12:20:33 -0400
Subject: [PATCH 13/26] Inline functions.
---
flang/lib/Semantics/resolve-directives.cpp | 62 +++++++++-------------
1 file changed, 24 insertions(+), 38 deletions(-)
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 3ad0f73607b44..c0a8fc81b0bb9 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -827,14 +827,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
void CollectAssociatedLoopLevelsFromInnerLoopContruct(
const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
llvm::SmallVector<const parser::OmpClause *> &);
- template <typename T>
- void CollectAssociatedLoopLevelFromClauseValue(
- const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &,
- llvm::SmallVector<const parser::OmpClause *> &);
- template <typename T>
- void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &,
- llvm::SmallVector<std::int64_t> &,
- llvm::SmallVector<const parser::OmpClause *> &);
void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &,
llvm::SmallVector<std::int64_t> &,
llvm::SmallVector<const parser::OmpClause *> &);
@@ -2077,40 +2069,34 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct(
}
}
-template <typename T>
-void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue(
- const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
- llvm::SmallVector<const parser::OmpClause *> &clauses) {
- if (const auto tclause{std::get_if<T>(&clause.u)}) {
- std::int64_t level = 0;
- if (const auto v{EvaluateInt64(context_, tclause->v)}) {
- level = *v;
- }
- levels.push_back(level);
- clauses.push_back(&clause);
- }
-}
-
-template <typename T>
-void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize(
- const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
- llvm::SmallVector<const parser::OmpClause *> &clauses) {
- if (const auto tclause{std::get_if<T>(&clause.u)}) {
- levels.push_back(tclause->v.size());
- clauses.push_back(&clause);
- }
-}
-
void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses(
const parser::OmpClauseList &x, llvm::SmallVector<std::int64_t> &levels,
llvm::SmallVector<const parser::OmpClause *> &clauses) {
for (const auto &clause : x.v) {
- CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Ordered>(
- clause, levels, clauses);
- CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Collapse>(
- clause, levels, clauses);
- CollectAssociatedLoopLevelFromClauseSize<parser::OmpClause::Sizes>(
- clause, levels, clauses);
+ if (const auto oclause{
+ std::get_if<parser::OmpClause::Ordered>(&clause.u)}) {
+ std::int64_t level = 0;
+ if (const auto v{EvaluateInt64(context_, oclause->v)}) {
+ level = *v;
+ }
+ levels.push_back(level);
+ clauses.push_back(&clause);
+ }
+
+ if (const auto cclause{
+ std::get_if<parser::OmpClause::Collapse>(&clause.u)}) {
+ std::int64_t level = 0;
+ if (const auto v{EvaluateInt64(context_, cclause->v)}) {
+ level = *v;
+ }
+ levels.push_back(level);
+ clauses.push_back(&clause);
+ }
+
+ if (const auto tclause{std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+ levels.push_back(tclause->v.size());
+ clauses.push_back(&clause);
+ }
}
}
>From d87797c20ff33d32719ef9d5e2fa754e3b6309ee Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:03:50 -0400
Subject: [PATCH 14/26] Remove debug code.
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7ad1e70cb6e75..bbd1ed6a3ab2d 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -810,11 +810,7 @@ void CodeExtractor::severSplitPHINodesOfExits() {
}
void CodeExtractor::splitReturnBlocks() {
- for (BasicBlock *Block : Blocks) {
- if (!Block->getTerminator()) {
- errs() << "====== No terminator in block: " << Block->getName()
- << "======\n";
- }
+ for (BasicBlock *Block : Blocks)
if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
BasicBlock *New =
Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
@@ -831,7 +827,6 @@ void CodeExtractor::splitReturnBlocks() {
DT->changeImmediateDominator(I, NewNode);
}
}
- }
}
Function *CodeExtractor::constructFunctionDeclaration(
>From 8c70c882bbb382254772a7e4b5667c91df25ad86 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:08:42 -0400
Subject: [PATCH 15/26] Reuse loop op lowering, add comment.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 0ad7acee33f26..316d4c40adfd2 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3525,7 +3525,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
break;
case llvm::omp::Directive::OMPD_tile:
newOp =
- genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
+ genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_unroll:
genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
>From 531fe648ed06089961ac58fc72bafc30998d1b1e Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:12:51 -0400
Subject: [PATCH 16/26] Fix formatting.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 316d4c40adfd2..373562b6f399b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3524,8 +3524,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
item);
break;
case llvm::omp::Directive::OMPD_tile:
- newOp =
- genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
+ newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_unroll:
genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
>From 2923141e9779c4b12590ccc2bc3cbc93e1eb7d61 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:21:03 -0400
Subject: [PATCH 17/26] Remove curly braces.
---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 9f6f9d9b91d0a..b235671cc7be7 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2978,11 +2978,10 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
(parser.parseLParen() || parser.parseInteger(value) ||
parser.parseRParen()))
return failure();
- if (value > 1) {
+ if (value > 1)
result.addAttribute(
"num_collapse",
IntegerAttr::get(parser.getBuilder().getI64Type(), value));
- }
// Parse tiles
SmallVector<int64_t> tiles;
@@ -2999,9 +2998,8 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
parser.parseRParen()))
return failure();
- if (tiles.size() > 0) {
+ if (tiles.size() > 0)
result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles));
- }
// Parse the body.
Region *region = result.addRegion();
@@ -3026,13 +3024,13 @@ void LoopNestOp::print(OpAsmPrinter &p) {
if (getLoopInclusive())
p << "inclusive ";
p << "step (" << getLoopSteps() << ") ";
- if (int64_t numCollapse = getNumCollapse()) {
+ if (int64_t numCollapse = getNumCollapse())
if (numCollapse > 1)
p << "collapse(" << numCollapse << ") ";
- }
- if (const auto tiles = getTileSizes()) {
+
+ if (const auto tiles = getTileSizes())
p << "tiles(" << tiles.value() << ") ";
- }
+
p.printRegion(region, /*printEntryBlockArgs=*/false);
}
>From 5939e185a7249aa16c359734ae5dd321e020734a Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Wed, 25 Jun 2025 10:11:36 -0400
Subject: [PATCH 18/26] Avoid attaching the sizes clause to the parent
construct, instead find the tile sizes through the parse tree when getting
the information needed to create the loop nest ops.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 23 ++---
flang/lib/Lower/OpenMP/Utils.cpp | 90 ++++++++++++++++++-
flang/lib/Lower/OpenMP/Utils.h | 5 ++
.../Frontend/OpenMP/ConstructDecompositionT.h | 4 -
4 files changed, 99 insertions(+), 23 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 373562b6f399b..a16947ae5120d 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -47,6 +47,7 @@
using namespace Fortran::lower::omp;
using namespace Fortran::common::openmp;
+using namespace Fortran::semantics;
//===----------------------------------------------------------------------===//
// Code generation helper functions
@@ -1690,6 +1691,7 @@ genLoopNestClauses(lower::AbstractConverter &converter,
int64_t collapseValue = evaluate::ToInt64(collapse.v).value();
clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue);
} else if (clause.id == llvm::omp::Clause::OMPC_sizes) {
+ // This case handles the stand-alone tiling construct
const auto &sizes = std::get<clause::Sizes>(clause.u);
llvm::SmallVector<int64_t> sizeValues;
for (auto &size : sizes.v) {
@@ -1699,6 +1701,12 @@ genLoopNestClauses(lower::AbstractConverter &converter,
clauseOps.tileSizes = sizeValues;
}
}
+
+ llvm::SmallVector<int64_t> sizeValues;
+ auto *ompCons{eval.getIf<parser::OpenMPConstruct>()};
+ collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx);
+ if (sizeValues.size() > 0)
+ clauseOps.tileSizes = sizeValues;
}
static void genLoopClauses(
@@ -3961,21 +3969,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
List<Clause> clauses = makeClauses(
std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
- const auto &innerOptional =
- std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
- loopConstruct.t);
- if (innerOptional.has_value()) {
- const auto &innerLoopDirective = innerOptional.value().value();
- const auto &innerBegin =
- std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
- const auto &innerDirective =
- std::get<parser::OmpLoopDirective>(innerBegin.t);
- if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
- clauses.append(
- makeClauses(std::get<parser::OmpClauseList>(innerBegin.t), semaCtx));
- }
- }
-
if (auto &endLoopDirective =
std::get<std::optional<parser::OmpEndLoopDirective>>(
loopConstruct.t)) {
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 69d74762ace6f..e7fa9063b7ae2 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -14,6 +14,7 @@
#include "ClauseFinder.h"
#include "flang/Lower/OpenMP/Clauses.h"
+#include "flang/Evaluate/fold.h"
#include <flang/Lower/AbstractConverter.h>
#include <flang/Lower/ConvertType.h>
#include <flang/Lower/DirectivesCommon.h>
@@ -24,10 +25,31 @@
#include <flang/Parser/parse-tree.h>
#include <flang/Parser/tools.h>
#include <flang/Semantics/tools.h>
+#include <flang/Semantics/type.h>
#include <llvm/Support/CommandLine.h>
#include <iterator>
+using namespace Fortran::semantics;
+
+template <typename T>
+MaybeIntExpr
+EvaluateIntExpr(SemanticsContext &context, const T &expr) {
+ if (MaybeExpr maybeExpr{
+ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) {
+ if (auto *intExpr{Fortran::evaluate::UnwrapExpr<SomeIntExpr>(*maybeExpr)}) {
+ return std::move(*intExpr);
+ }
+ }
+ return std::nullopt;
+}
+
+template <typename T>
+std::optional<std::int64_t>
+EvaluateInt64(SemanticsContext &context, const T &expr) {
+ return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr));
+}
+
llvm::cl::opt<bool> treatIndexAsSection(
"openmp-treat-index-as-section",
llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
@@ -615,6 +637,43 @@ static void convertLoopBounds(lower::AbstractConverter &converter,
}
}
+// Populates the sizes vector with values if the given OpenMPConstruct
+// Contains a loop construct with an inner tiling construct.
+void collectTileSizesFromOpenMPConstruct(
+ const parser::OpenMPConstruct *ompCons,
+ llvm::SmallVectorImpl<int64_t> &tileSizes,
+ SemanticsContext &semaCtx) {
+ if (!ompCons)
+ return;
+
+ if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
+ const auto &innerOptional = std::get<
+ std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+ ompLoop->t);
+ if (innerOptional.has_value()) {
+ const auto &innerLoopDirective = innerOptional.value().value();
+ const auto &innerBegin =
+ std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+ const auto &innerDirective =
+ std::get<parser::OmpLoopDirective>(innerBegin.t).v;
+
+ if (innerDirective == llvm::omp::Directive::OMPD_tile) {
+ // Get the size values from parse tree and convert to a vector
+ const auto &innerClauseList{
+ std::get<parser::OmpClauseList>(innerBegin.t)};
+ for (const auto &clause : innerClauseList.v)
+ if (const auto tclause{
+ std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+ for (auto &tval : tclause->v) {
+ if (const auto v{EvaluateInt64(semaCtx, tval)})
+ tileSizes.push_back(*v);
+ }
+ }
+ }
+ }
+ }
+}
+
bool collectLoopRelatedInfo(
lower::AbstractConverter &converter, mlir::Location currentLocation,
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
@@ -636,11 +695,34 @@ bool collectLoopRelatedInfo(
collapseValue = evaluate::ToInt64(clause->v).value();
found = true;
}
+
+ // Collect sizes from tile directive if present
std::int64_t sizesLengthValue = 0l;
- if (auto *clause =
- ClauseFinder::findUniqueClause<omp::clause::Sizes>(clauses)) {
- sizesLengthValue = clause->v.size();
- found = true;
+ if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
+ if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
+ const auto &innerOptional = std::get<
+ std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+ ompLoop->t);
+ if (innerOptional.has_value()) {
+ const auto &innerLoopDirective = innerOptional.value().value();
+ const auto &innerBegin =
+ std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+ const auto &innerDirective =
+ std::get<parser::OmpLoopDirective>(innerBegin.t).v;
+
+ if (innerDirective == llvm::omp::Directive::OMPD_tile) {
+ // Get the size values from parse tree and convert to a vector
+ const auto &innerClauseList{
+ std::get<parser::OmpClauseList>(innerBegin.t)};
+ for (const auto &clause : innerClauseList.v)
+ if (const auto tclause{
+ std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+ sizesLengthValue = tclause->v.size();
+ found = true;
+ }
+ }
+ }
+ }
}
collapseValue = collapseValue - sizesLengthValue;
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 60f44a7f0610c..bb42fb02efc09 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -175,6 +175,11 @@ bool collectLoopRelatedInfo(
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
+void collectTileSizesFromOpenMPConstruct(
+ const parser::OpenMPConstruct *ompCons,
+ llvm::SmallVectorImpl<int64_t> &tileSizes,
+ Fortran::semantics::SemanticsContext &semaCtx);
+
} // namespace omp
} // namespace lower
} // namespace Fortran
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index e1083b7ef2cd9..5bb1f3f36b65e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -497,10 +497,6 @@ bool ConstructDecompositionT<C, H>::applyClause(
if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) {
last.clauses.push_back(node);
return true;
- } else {
- // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
- last.clauses.push_back(node);
- return true;
}
}
>From 583e04294c0b14d1a134141bc542ffd097268293 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Wed, 25 Jun 2025 10:33:33 -0400
Subject: [PATCH 19/26] Fix formatting
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +-
flang/lib/Lower/OpenMP/Utils.cpp | 10 ++++------
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index a16947ae5120d..3a9429b3c3ef8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1704,7 +1704,7 @@ genLoopNestClauses(lower::AbstractConverter &converter,
llvm::SmallVector<int64_t> sizeValues;
auto *ompCons{eval.getIf<parser::OpenMPConstruct>()};
- collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx);
+ collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx);
if (sizeValues.size() > 0)
clauseOps.tileSizes = sizeValues;
}
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index e7fa9063b7ae2..e04a6eae98408 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -33,8 +33,7 @@
using namespace Fortran::semantics;
template <typename T>
-MaybeIntExpr
-EvaluateIntExpr(SemanticsContext &context, const T &expr) {
+MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) {
if (MaybeExpr maybeExpr{
Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) {
if (auto *intExpr{Fortran::evaluate::UnwrapExpr<SomeIntExpr>(*maybeExpr)}) {
@@ -45,8 +44,8 @@ EvaluateIntExpr(SemanticsContext &context, const T &expr) {
}
template <typename T>
-std::optional<std::int64_t>
-EvaluateInt64(SemanticsContext &context, const T &expr) {
+std::optional<std::int64_t> EvaluateInt64(SemanticsContext &context,
+ const T &expr) {
return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr));
}
@@ -641,8 +640,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter,
// Contains a loop construct with an inner tiling construct.
void collectTileSizesFromOpenMPConstruct(
const parser::OpenMPConstruct *ompCons,
- llvm::SmallVectorImpl<int64_t> &tileSizes,
- SemanticsContext &semaCtx) {
+ llvm::SmallVectorImpl<int64_t> &tileSizes, SemanticsContext &semaCtx) {
if (!ompCons)
return;
>From 219595f1806a30b388294f99db3c21f7c4e1eefb Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 26 Jun 2025 10:03:13 -0400
Subject: [PATCH 20/26] Fix unparse and add a test for nested loop constructs.
---
flang/test/Parser/OpenMP/do-tile-size.f90 | 29 +++++++++++++++++++++++
1 file changed, 29 insertions(+)
create mode 100644 flang/test/Parser/OpenMP/do-tile-size.f90
diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90
new file mode 100644
index 0000000000000..886ee4a2a680c
--- /dev/null
+++ b/flang/test/Parser/OpenMP/do-tile-size.f90
@@ -0,0 +1,29 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine openmp_do_tiles(x)
+
+ integer, intent(inout)::x
+
+
+!CHECK: !$omp do
+!CHECK: !$omp tile sizes
+!$omp do
+!$omp tile sizes(2)
+!CHECK: do
+ do x = 1, 100
+ call F1()
+!CHECK: end do
+ end do
+!CHECK: !$omp end tile
+!$omp end tile
+!$omp end do
+
+!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!PARSE-TREE:| | | OmpBeginLoopDirective
+!PARSE-TREE:| | | OpenMPLoopConstruct
+!PARSE-TREE:| | | | OmpBeginLoopDirective
+!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile
+!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4'
+!PARSE-TREE: | | | | DoConstruct
+END subroutine openmp_do_tiles
>From 356c1667181a843c090f02caf02deb7ae139bf5d Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 26 Jun 2025 10:50:08 -0400
Subject: [PATCH 21/26] Use more convenient function to get
OpenMPLoopConstruct. Fix comments.
---
flang/lib/Semantics/canonicalize-omp.cpp | 30 ++++++++-----------
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 9 +++---
2 files changed, 17 insertions(+), 22 deletions(-)
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index a7749b5a81678..79630b564e51a 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -152,23 +152,19 @@ class CanonicalizationOfOmp {
// Keep track of the loops to handle the end loop directives
llvm::SmallVector<parser::OpenMPLoopConstruct *> loops;
loops.push_back(&x);
- if (auto *innerConstruct{
- GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
- if (auto *innerOmpLoop{
- std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
- auto &innerBeginDir{
- std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
- auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
- if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
- auto &innerLoop = std::get<std::optional<
- common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.back()->t);
- innerLoop = std::move(*innerOmpLoop);
- // Retrieveing the address so that DoConstruct or inner loop can be
- // set later.
- loops.push_back(&(innerLoop.value().value()));
- nextIt = block.erase(nextIt);
- }
+ if (auto *innerOmpLoop{GetOmpIf<parser::OpenMPLoopConstruct>(*nextIt)}) {
+ auto &innerBeginDir{
+ std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
+ auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
+ if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
+ auto &innerLoop = std::get<
+ std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+ loops.back()->t);
+ innerLoop = std::move(*innerOmpLoop);
+ // Retrieveing the address so that DoConstruct or inner loop can be
+ // set later.
+ loops.push_back(&(innerLoop.value().value()));
+ nextIt = block.erase(nextIt);
}
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c5b7cc76eb17f..a0effac22bd35 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3034,7 +3034,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
loopInfos.push_back(*loopResult);
}
- // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP();
llvm::OpenMPIRBuilder::InsertPointTy afterIP =
loopInfos.front()->getAfterIP();
@@ -3055,10 +3054,10 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
std::vector<llvm::CanonicalLoopInfo *> NewLoops =
ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
- // Collapse loops. Store the insertion point because LoopInfos may get
- // invalidated.
- auto AfterBB = NewLoops.front()->getAfter();
- auto AfterAfterBB = AfterBB->getSingleSuccessor();
+ // Update afterIP to get the correct insertion point after
+ // tiling.
+ llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter();
+ llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor();
afterIP = {AfterAfterBB, AfterAfterBB->begin()};
NewTopLoopInfo = NewLoops[0];
>From 955ccf6f67afcc5366eed8c8fa9b589acd328cd0 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 26 Jun 2025 10:54:46 -0400
Subject: [PATCH 22/26] Fix formatting.
---
.../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a0effac22bd35..40864d5e6c99e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3057,7 +3057,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
// Update afterIP to get the correct insertion point after
// tiling.
llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter();
- llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor();
+ llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor();
afterIP = {AfterAfterBB, AfterAfterBB->begin()};
NewTopLoopInfo = NewLoops[0];
>From a0501d0dba534f27888f76f21087c6032b5e3b9b Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 9 Aug 2025 10:53:23 -0400
Subject: [PATCH 23/26] Fix merge problems related to the different
representations used for nested loop constructs.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 52 +++++-----------------
flang/lib/Lower/OpenMP/Utils.cpp | 28 +++++++-----
flang/lib/Semantics/canonicalize-omp.cpp | 10 +++--
flang/lib/Semantics/resolve-directives.cpp | 34 ++++++++------
4 files changed, 56 insertions(+), 68 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 3a9429b3c3ef8..4d6ba34193a54 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -422,14 +422,19 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
// FIXME(JAN): For now we check if there is an inner
// OpenMPLoopConstruct, and extract the size clause from there
- const auto &innerOptional = std::get<std::optional<
- common::Indirection<parser::OpenMPLoopConstruct>>>(
- ompConstruct.t);
- if (innerOptional.has_value()) {
- const auto &innerLoopDirective = innerOptional.value().value();
+ const auto &nestedOptional =
+ std::get<std::optional<parser::NestedConstruct>>(
+ ompConstruct.t);
+ assert(nestedOptional.has_value() &&
+ "Expected a DoConstruct or OpenMPLoopConstruct");
+ const auto *innerConstruct =
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ &(nestedOptional.value()));
+ if (innerConstruct) {
+ const auto &innerLoopConstruct = innerConstruct->value();
const auto &innerBegin =
std::get<parser::OmpBeginLoopDirective>(
- innerLoopDirective.t);
+ innerLoopConstruct.t);
const auto &innerDirective =
std::get<parser::OmpLoopDirective>(innerBegin.t);
if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
@@ -2276,41 +2281,6 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
// Apply unrolling to it
auto cli = canonLoop.getCli();
mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
-
-static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter,
- lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx,
- lower::pft::Evaluation &eval,
- mlir::Location loc,
- const ConstructQueue &queue,
- ConstructQueue::const_iterator item) {
- mlir::omp::LoopOperands loopClauseOps;
- llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
- genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
- loopReductionSyms);
-
- DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
- /*shouldCollectPreDeterminedSymbols=*/true,
- /*useDelayedPrivatization=*/true, symTable);
- dsp.processStep1(&loopClauseOps);
-
- mlir::omp::LoopNestOperands loopNestClauseOps;
- llvm::SmallVector<const semantics::Symbol *> iv;
- genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
- loopNestClauseOps, iv);
-
- EntryBlockArgs loopArgs;
- loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
- loopArgs.priv.vars = loopClauseOps.privateVars;
- loopArgs.reduction.syms = loopReductionSyms;
- loopArgs.reduction.vars = loopClauseOps.reductionVars;
-
- auto loopOp =
- genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
- genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
- loopNestClauseOps, iv, {{loopOp, loopArgs}},
- llvm::omp::Directive::OMPD_loop, dsp);
- return loopOp;
}
static mlir::omp::MaskedOp
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index e04a6eae98408..dc58eecae7759 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -645,11 +645,15 @@ void collectTileSizesFromOpenMPConstruct(
return;
if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
- const auto &innerOptional = std::get<
- std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
- ompLoop->t);
- if (innerOptional.has_value()) {
- const auto &innerLoopDirective = innerOptional.value().value();
+ const auto &nestedOptional =
+ std::get<std::optional<parser::NestedConstruct>>(ompLoop->t);
+ assert(nestedOptional.has_value() &&
+ "Expected a DoConstruct or OpenMPLoopConstruct");
+ const auto *innerConstruct =
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ &(nestedOptional.value()));
+ if (innerConstruct) {
+ const auto &innerLoopDirective = innerConstruct->value();
const auto &innerBegin =
std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
const auto &innerDirective =
@@ -698,11 +702,15 @@ bool collectLoopRelatedInfo(
std::int64_t sizesLengthValue = 0l;
if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
- const auto &innerOptional = std::get<
- std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
- ompLoop->t);
- if (innerOptional.has_value()) {
- const auto &innerLoopDirective = innerOptional.value().value();
+ const auto &nestedOptional =
+ std::get<std::optional<parser::NestedConstruct>>(ompLoop->t);
+ assert(nestedOptional.has_value() &&
+ "Expected a DoConstruct or OpenMPLoopConstruct");
+ const auto *innerConstruct =
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ &(nestedOptional.value()));
+ if (innerConstruct) {
+ const auto &innerLoopDirective = innerConstruct->value();
const auto &innerBegin =
std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
const auto &innerDirective =
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 79630b564e51a..4792bf2cb217c 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -157,13 +157,15 @@ class CanonicalizationOfOmp {
std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
- auto &innerLoop = std::get<
- std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
- loops.back()->t);
+ auto &innerLoopVariant =
+ std::get<std::optional<parser::NestedConstruct>>(loops.back()->t);
+ auto &innerLoop =
+ std::get<common::Indirection<parser::OpenMPLoopConstruct>>(
+ innerLoopVariant.value());
innerLoop = std::move(*innerOmpLoop);
// Retrieveing the address so that DoConstruct or inner loop can be
// set later.
- loops.push_back(&(innerLoop.value().value()));
+ loops.push_back(&(innerLoop.value()));
nextIt = block.erase(nextIt);
}
}
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index c0a8fc81b0bb9..b8c6cb6f5f5f7 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2060,12 +2060,18 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct(
const parser::OpenMPLoopConstruct &x,
llvm::SmallVector<std::int64_t> &levels,
llvm::SmallVector<const parser::OmpClause *> &clauses) {
- const auto &innerOptional =
- std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
- x.t);
- if (innerOptional.has_value()) {
+
+ const auto &nestedOptional =
+ std::get<std::optional<parser::NestedConstruct>>(x.t);
+ assert(nestedOptional.has_value() &&
+ "Expected a DoConstruct or OpenMPLoopConstruct");
+ const auto *innerConstruct =
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ &(nestedOptional.value()));
+
+ if (innerConstruct) {
CollectAssociatedLoopLevelsFromLoopConstruct(
- innerOptional.value().value(), levels, clauses);
+ innerConstruct->value(), levels, clauses);
}
}
@@ -2130,17 +2136,19 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
bool hasCollapseClause{
clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false};
const parser::OpenMPLoopConstruct *innerMostLoop = &x;
-
+ const parser::NestedConstruct *innerMostNest = nullptr;
while (auto &optLoopCons{
- std::get<std::optional<parser::NestedConstruct>>(x.t)}) {
- if (const auto &innerLoop{
- std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) {
- innerMostLoop = &innerLoop.value().value();
+ std::get<std::optional<parser::NestedConstruct>>(innerMostLoop->t)}) {
+ innerMostNest = &(optLoopCons.value());
+ if (const auto *innerLoop{
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ innerMostNest)}) {
+ innerMostLoop = &(innerLoop->value());
}
}
- if (optLoopCons.has_value()) {
- if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostLoop->t)}) {
+ if (innerMostNest) {
+ if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostNest)}) {
for (const parser::DoConstruct *loop{&*outer}; loop && level > 0;
--level) {
if (loop->IsDoConcurrent()) {
@@ -2176,7 +2184,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
CheckAssocLoopLevel(level, GetAssociatedClause());
} else if (const auto &loop{std::get_if<
common::Indirection<parser::OpenMPLoopConstruct>>(
- &*optLoopCons)}) {
+ innerMostNest)}) {
auto &beginDirective =
std::get<parser::OmpBeginLoopDirective>(loop->value().t);
auto &beginLoopDirective =
>From aea076714a7d3063d7c183e06a92a2dc3b3ee886 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 9 Aug 2025 12:27:35 -0400
Subject: [PATCH 24/26] Fix bugs introduced when merging.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 4 ++--
flang/lib/Semantics/canonicalize-omp.cpp | 19 +++++++++---------
flang/lib/Semantics/resolve-directives.cpp | 3 ++-
...nested-loop-transformation-construct01.f90 | 20 -------------------
flang/test/Lower/OpenMP/wsloop-tile.f90 | 2 +-
5 files changed, 15 insertions(+), 33 deletions(-)
delete mode 100644 flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4d6ba34193a54..2c9c740aca82e 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -420,8 +420,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
beginClauseList =
&std::get<parser::OmpClauseList>(beginDirective.t);
- // FIXME(JAN): For now we check if there is an inner
- // OpenMPLoopConstruct, and extract the size clause from there
+ // For now we check if there is an inner OpenMPLoopConstruct, and
+ // extract the size clause from there
const auto &nestedOptional =
std::get<std::optional<parser::NestedConstruct>>(
ompConstruct.t);
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 4792bf2cb217c..c664171350d9e 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -143,7 +143,6 @@ class CanonicalizationOfOmp {
"If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US,
parser::ToUpperCaseLetters(dir.source.ToString()));
};
-
nextIt = it;
while (++nextIt != block.end()) {
// Ignore compiler directives.
@@ -159,14 +158,16 @@ class CanonicalizationOfOmp {
if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
auto &innerLoopVariant =
std::get<std::optional<parser::NestedConstruct>>(loops.back()->t);
- auto &innerLoop =
- std::get<common::Indirection<parser::OpenMPLoopConstruct>>(
- innerLoopVariant.value());
- innerLoop = std::move(*innerOmpLoop);
- // Retrieveing the address so that DoConstruct or inner loop can be
- // set later.
- loops.push_back(&(innerLoop.value()));
- nextIt = block.erase(nextIt);
+ if (innerLoopVariant.has_value()) {
+ auto *innerLoop =
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ &(innerLoopVariant.value()));
+ *innerLoop = std::move(*innerOmpLoop);
+ // Retrieveing the address so that DoConstruct or inner loop can be
+ // set later.
+ loops.push_back(&(innerLoop->value()));
+ nextIt = block.erase(nextIt);
+ }
}
}
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index b8c6cb6f5f5f7..80393a4eec2a0 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2144,7 +2144,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
innerMostNest)}) {
innerMostLoop = &(innerLoop->value());
- }
+ } else
+ break;
}
if (innerMostNest) {
diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
deleted file mode 100644
index 17eba93a7405d..0000000000000
--- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
+++ /dev/null
@@ -1,20 +0,0 @@
-! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested.
-
-!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
-
-subroutine loop_transformation_construct
- implicit none
- integer :: I = 10
- integer :: x
- integer :: y(I)
-
- !$omp do
- !$omp tile
- do i = 1, I
- y(i) = y(i) * 5
- end do
- !$omp end tile
- !$omp end do
-end subroutine
-
-!CHECK: not yet implemented: Unhandled loop directive (tile)
diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90
index c9bf18e3b278d..4c412b357f52e 100644
--- a/flang/test/Lower/OpenMP/wsloop-tile.f90
+++ b/flang/test/Lower/OpenMP/wsloop-tile.f90
@@ -2,7 +2,7 @@
! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s
-!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} {
+!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} {
program wsloop_tile
integer :: i, j, k
integer :: a, b, c
>From 5dca0a3dd3d4f744fc41ea277e928e23538913f9 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 11 Aug 2025 07:23:15 -0400
Subject: [PATCH 25/26] Move include
---
flang/lib/Lower/OpenMP/Utils.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index dc58eecae7759..07f562fa6a4b1 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -13,8 +13,8 @@
#include "Utils.h"
#include "ClauseFinder.h"
-#include "flang/Lower/OpenMP/Clauses.h"
#include "flang/Evaluate/fold.h"
+#include "flang/Lower/OpenMP/Clauses.h"
#include <flang/Lower/AbstractConverter.h>
#include <flang/Lower/ConvertType.h>
#include <flang/Lower/DirectivesCommon.h>
>From 5c0f849e9fbf01958a3c343ef12229034cd8d9f5 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Tue, 19 Aug 2025 10:55:05 -0400
Subject: [PATCH 26/26] Remove unused code. Currently the canonicalize-omp can
only handle a single nested loop construct, which is what we prefer.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 1 -
flang/lib/Lower/OpenMP/Utils.cpp | 22 ++----
flang/lib/Semantics/canonicalize-omp.cpp | 68 ++++---------------
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 -
4 files changed, 20 insertions(+), 73 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2c9c740aca82e..30e537c5b0dce 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -937,7 +937,6 @@ static void genLoopVars(
storeOp =
createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
}
-
firOpBuilder.setInsertionPointAfter(storeOp);
}
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 07f562fa6a4b1..6c9763e5a37ab 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -59,21 +59,14 @@ namespace lower {
namespace omp {
int64_t getCollapseValue(const List<Clause> &clauses) {
- int64_t collapseValue = 1;
- int64_t numTileSizes = 0;
- for (auto &clause : clauses) {
- if (clause.id == llvm::omp::Clause::OMPC_collapse) {
- const auto &collapse = std::get<clause::Collapse>(clause.u);
- collapseValue = evaluate::ToInt64(collapse.v).value();
- } else if (clause.id == llvm::omp::Clause::OMPC_sizes) {
- const auto &sizes = std::get<clause::Sizes>(clause.u);
- numTileSizes = sizes.v.size();
- }
+ auto iter = llvm::find_if(clauses, [](const Clause &clause) {
+ return clause.id == llvm::omp::Clause::OMPC_collapse;
+ });
+ if (iter != clauses.end()) {
+ const auto &collapse = std::get<clause::Collapse>(iter->u);
+ return evaluate::ToInt64(collapse.v).value();
}
-
- collapseValue = collapseValue - numTileSizes;
- int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes;
- return result;
+ return 1;
}
void genObjectList(const ObjectList &objects,
@@ -681,7 +674,6 @@ bool collectLoopRelatedInfo(
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
-
bool found = false;
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index c664171350d9e..9722eca19447d 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -10,6 +10,7 @@
#include "flang/Parser/parse-tree-visitor.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/semantics.h"
+
// After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
// Constructs more structured which provide explicit scopes for later
// structural checks and semantic analysis.
@@ -116,19 +117,15 @@ class CanonicalizationOfOmp {
// in the same iteration
//
// Original:
- // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
- // OmpBeginLoopDirective t-> OmpLoopDirective
- // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u->
- // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile]
+ // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+ // OmpBeginLoopDirective
// ExecutableConstruct -> DoConstruct
- // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile)
// ExecutableConstruct -> OmpEndLoopDirective (if available)
//
// After rewriting:
- // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
- // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective
- // OmpEndLoopDirective] (note: tile)
- // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct
+ // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+ // OmpBeginLoopDirective
+ // DoConstruct
// OmpEndLoopDirective (if available)
parser::Block::iterator nextIt;
auto &beginDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
@@ -143,66 +140,27 @@ class CanonicalizationOfOmp {
"If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US,
parser::ToUpperCaseLetters(dir.source.ToString()));
};
+
nextIt = it;
while (++nextIt != block.end()) {
// Ignore compiler directives.
if (GetConstructIf<parser::CompilerDirective>(*nextIt))
continue;
- // Keep track of the loops to handle the end loop directives
- llvm::SmallVector<parser::OpenMPLoopConstruct *> loops;
- loops.push_back(&x);
- if (auto *innerOmpLoop{GetOmpIf<parser::OpenMPLoopConstruct>(*nextIt)}) {
- auto &innerBeginDir{
- std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
- auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
- if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
- auto &innerLoopVariant =
- std::get<std::optional<parser::NestedConstruct>>(loops.back()->t);
- if (innerLoopVariant.has_value()) {
- auto *innerLoop =
- std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
- &(innerLoopVariant.value()));
- *innerLoop = std::move(*innerOmpLoop);
- // Retrieveing the address so that DoConstruct or inner loop can be
- // set later.
- loops.push_back(&(innerLoop->value()));
- nextIt = block.erase(nextIt);
- }
- }
- }
if (auto *doCons{GetConstructIf<parser::DoConstruct>(*nextIt)}) {
if (doCons->GetLoopControl()) {
// move DoConstruct
std::get<std::optional<std::variant<parser::DoConstruct,
- common::Indirection<parser::OpenMPLoopConstruct>>>>(
- loops.back()->t) = std::move(*doCons);
+ common::Indirection<parser::OpenMPLoopConstruct>>>>(x.t) =
+ std::move(*doCons);
nextIt = block.erase(nextIt);
// try to match OmpEndLoopDirective
- while (nextIt != block.end() && !loops.empty()) {
+ if (nextIt != block.end()) {
if (auto *endDir{
GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
- auto &endOmpDirective{
- std::get<parser::OmpLoopDirective>(endDir->t)};
- auto &loopBegin{
- std::get<parser::OmpBeginLoopDirective>(loops.back()->t)};
- auto &loopDir{std::get<parser::OmpLoopDirective>(loopBegin.t)};
-
- // If the directive is a tile we try to match the corresponding
- // end tile if it exsists. If it is not a tile directive we
- // always assign the end loop directive and fall back on the
- // existing directive structure checks.
- if (loopDir.v != llvm::omp::Directive::OMPD_tile ||
- loopDir.v == endOmpDirective.v) {
- std::get<std::optional<parser::OmpEndLoopDirective>>(
- loops.back()->t) = std::move(*endDir);
- nextIt = block.erase(nextIt);
- }
-
- loops.pop_back();
- } else {
- // If there is a mismatch bail out.
- break;
+ std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
+ std::move(*endDir);
+ nextIt = block.erase(nextIt);
}
}
} else {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 40864d5e6c99e..9429091605320 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3037,8 +3037,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy afterIP =
loopInfos.front()->getAfterIP();
- // Initialize the new loop info to the current one, in case there
- // are no loop transformations done.
llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr;
// Do tiling
More information about the flang-commits
mailing list