From flang-commits at lists.llvm.org Tue Jul 1 00:39:19 2025 From: flang-commits at lists.llvm.org (via flang-commits) Date: Tue, 01 Jul 2025 00:39:19 -0700 (PDT) Subject: [flang-commits] [flang] 65cb0ea - [Flang][OpenMP] Add Semantics support for Nested OpenMPLoopConstructs (#145917) Message-ID: <686390a7.170a0220.968f2.82bb@mx.google.com> Author: Jack Styles Date: 2025-07-01T08:39:15+01:00 New Revision: 65cb0eae58d2b668869f3e8f10cb79eb2b8c55ac URL: https://github.com/llvm/llvm-project/commit/65cb0eae58d2b668869f3e8f10cb79eb2b8c55ac DIFF: https://github.com/llvm/llvm-project/commit/65cb0eae58d2b668869f3e8f10cb79eb2b8c55ac.diff LOG: [Flang][OpenMP] Add Semantics support for Nested OpenMPLoopConstructs (#145917) In OpenMP Version 5.1, the tile and unroll directives were added. When using these directives, it is possible to nest them within other OpenMP Loop Constructs. This patch enables the semantics to allow for this behaviour on these specific directives. Any nested loops will be stored within the initial Loop Construct until reaching the DoConstruct itself. Relevant tests have been added, and previous behaviour has been retained with no changes. See also, #110008 Added: flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90 flang/test/Parser/OpenMP/loop-transformation-construct01.f90 flang/test/Parser/OpenMP/loop-transformation-construct02.f90 flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 Modified: flang/include/flang/Parser/parse-tree.h flang/lib/Lower/OpenMP/OpenMP.cpp flang/lib/Parser/unparse.cpp flang/lib/Semantics/canonicalize-omp.cpp flang/lib/Semantics/check-omp-structure.cpp flang/lib/Semantics/resolve-directives.cpp Removed: ################################################################################ diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 61f97b855b0e5..7e752eeb4dfe4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -267,6 +267,7 @@ struct AccEndCombinedDirective; struct OpenACCDeclarativeConstruct; struct OpenACCRoutineConstruct; struct OpenMPConstruct; +struct OpenMPLoopConstruct; struct OpenMPDeclarativeConstruct; struct OmpEndLoopDirective; struct OmpMemoryOrderClause; @@ -5021,11 +5022,13 @@ struct OpenMPBlockConstruct { }; // OpenMP directives enclosing do loop +using NestedConstruct = + std::variant>; struct OpenMPLoopConstruct { TUPLE_CLASS_BOILERPLATE(OpenMPLoopConstruct); OpenMPLoopConstruct(OmpBeginLoopDirective &&a) : t({std::move(a), std::nullopt, std::nullopt}) {} - std::tuple, + std::tuple, std::optional> t; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 96daadfaf1b3b..0a56e888ac44b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3769,6 +3769,16 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); + auto &optLoopCons = + std::get>(loopConstruct.t); + if (optLoopCons.has_value()) { + if (auto *ompNestedLoopCons{ + std::get_if>( + &*optLoopCons)}) { + genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + } + } + llvm::omp::Directive directive = std::get(beginLoopDirective.t).v; const parser::CharBlock &source = diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ed0f227fd5b98..903287515e559 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2926,7 +2926,8 @@ class UnparseVisitor { Walk(std::get(x.t)); Put("\n"); EndOpenMP(); - Walk(std::get>(x.t)); + Walk(std::get>>>(x.t)); Walk(std::get>(x.t)); } void Unparse(const BasedPointer &x) { diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 5164f1dc6faab..1edcb376596b0 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -8,6 +8,7 @@ #include "canonicalize-omp.h" #include "flang/Parser/parse-tree-visitor.h" +#include "flang/Parser/parse-tree.h" // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later @@ -125,6 +126,16 @@ class CanonicalizationOfOmp { parser::Block::iterator nextIt; auto &beginDir{std::get(x.t)}; auto &dir{std::get(beginDir.t)}; + auto missingDoConstruct = [](auto &dir, auto &messages) { + messages.Say(dir.source, + "A DO loop must follow the %s directive"_err_en_US, + parser::ToUpperCaseLetters(dir.source.ToString())); + }; + auto tileUnrollError = [](auto &dir, auto &messages) { + messages.Say(dir.source, + "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US, + parser::ToUpperCaseLetters(dir.source.ToString())); + }; nextIt = it; while (++nextIt != block.end()) { @@ -135,31 +146,95 @@ class CanonicalizationOfOmp { if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct - std::get>(x.t) = + std::get>>>(x.t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - if (nextIt != block.end()) { - if (auto *endDir{ - GetConstructIf(*nextIt)}) { - std::get>(x.t) = - std::move(*endDir); - block.erase(nextIt); - } + if (auto *endDir{ + GetConstructIf(*nextIt)}) { + std::get>(x.t) = + std::move(*endDir); + nextIt = block.erase(nextIt); } } else { messages_.Say(dir.source, "DO loop after the %s directive must have loop control"_err_en_US, parser::ToUpperCaseLetters(dir.source.ToString())); } + } else if (auto *ompLoopCons{ + GetOmpIf(*nextIt)}) { + // We should allow UNROLL and TILE constructs to be inserted between an + // OpenMP Loop Construct and the DO loop itself + auto &nestedBeginDirective = + std::get(ompLoopCons->t); + auto &nestedBeginLoopDirective = + std::get(nestedBeginDirective.t); + if ((nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll || + nestedBeginLoopDirective.v == + llvm::omp::Directive::OMPD_tile) && + !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile)) { + // iterate through the remaining block items to find the end directive + // for the unroll/tile directive. + parser::Block::iterator endIt; + endIt = nextIt; + while (endIt != block.end()) { + if (auto *endDir{ + GetConstructIf(*endIt)}) { + auto &endLoopDirective = + std::get(endDir->t); + if (endLoopDirective.v == dir.v) { + std::get>(x.t) = + std::move(*endDir); + endIt = block.erase(endIt); + continue; + } + } + ++endIt; + } + RewriteOpenMPLoopConstruct(*ompLoopCons, block, nextIt); + auto &ompLoop = std::get>(x.t); + ompLoop = + std::optional{parser::NestedConstruct{ + common::Indirection{std::move(*ompLoopCons)}}}; + nextIt = block.erase(nextIt); + } else if (nestedBeginLoopDirective.v == + llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile) { + // if a loop has been unrolled, the user can not then tile that loop + // as it has been unrolled + parser::OmpClauseList &unrollClauseList{ + std::get(nestedBeginDirective.t)}; + if (unrollClauseList.v.empty()) { + // if the clause list is empty for an unroll construct, we assume + // the loop is being fully unrolled + tileUnrollError(dir, messages_); + } else { + // parse the clauses for the unroll directive to find the full + // clause + for (auto clause{unrollClauseList.v.begin()}; + clause != unrollClauseList.v.end(); ++clause) { + if (clause->Id() == llvm::omp::OMPC_full) { + tileUnrollError(dir, messages_); + } + } + } + } else { + messages_.Say(nestedBeginLoopDirective.source, + "Only Loop Transformation Constructs or Loop Nests can be nested within Loop Constructs"_err_en_US, + parser::ToUpperCaseLetters( + nestedBeginLoopDirective.source.ToString())); + } } else { - messages_.Say(dir.source, - "A DO loop must follow the %s directive"_err_en_US, - parser::ToUpperCaseLetters(dir.source.ToString())); + missingDoConstruct(dir, messages_); } // If we get here, we either found a loop, or issued an error message. return; } + if (nextIt == block.end()) { + missingDoConstruct(dir, messages_); + } } void RewriteOmpAllocations(parser::ExecutionPart &body) { diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 3abb5a304b00c..e080bce3cac3a 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -762,10 +762,13 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { } SetLoopInfo(x); - if (const auto &doConstruct{ - std::get>(x.t)}) { - const auto &doBlock{std::get(doConstruct->t)}; - CheckNoBranching(doBlock, beginDir.v, beginDir.source); + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &doConstruct{ + std::get_if(&*optLoopCons)}) { + const auto &doBlock{std::get(doConstruct->t)}; + CheckNoBranching(doBlock, beginDir.v, beginDir.source); + } } CheckLoopItrVariableIsInt(x); CheckAssociatedLoopConstraints(x); @@ -786,12 +789,15 @@ const parser::Name OmpStructureChecker::GetLoopIndex( return std::get(x->GetLoopControl()->u).name.thing; } void OmpStructureChecker::SetLoopInfo(const parser::OpenMPLoopConstruct &x) { - if (const auto &loopConstruct{ - std::get>(x.t)}) { - const parser::DoConstruct *loop{&*loopConstruct}; - if (loop && loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - SetLoopIv(itrVal.symbol); + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &loopConstruct{ + std::get_if(&*optLoopCons)}) { + const parser::DoConstruct *loop{&*loopConstruct}; + if (loop && loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + SetLoopIv(itrVal.symbol); + } } } } @@ -857,27 +863,30 @@ void OmpStructureChecker::CheckIteratorModifier(const parser::OmpIterator &x) { void OmpStructureChecker::CheckLoopItrVariableIsInt( const parser::OpenMPLoopConstruct &x) { - if (const auto &loopConstruct{ - std::get>(x.t)}) { + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &loopConstruct{ + std::get_if(&*optLoopCons)}) { - for (const parser::DoConstruct *loop{&*loopConstruct}; loop;) { - if (loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - if (itrVal.symbol) { - const auto *type{itrVal.symbol->GetType()}; - if (!type->IsNumeric(TypeCategory::Integer)) { - context_.Say(itrVal.source, - "The DO loop iteration" - " variable must be of the type integer."_err_en_US, - itrVal.ToString()); + for (const parser::DoConstruct *loop{&*loopConstruct}; loop;) { + if (loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + if (itrVal.symbol) { + const auto *type{itrVal.symbol->GetType()}; + if (!type->IsNumeric(TypeCategory::Integer)) { + context_.Say(itrVal.source, + "The DO loop iteration" + " variable must be of the type integer."_err_en_US, + itrVal.ToString()); + } } } + // Get the next DoConstruct if block is not empty. + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? parser::Unwrap(*it) + : nullptr; } - // Get the next DoConstruct if block is not empty. - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? parser::Unwrap(*it) - : nullptr; } } } @@ -1077,25 +1086,28 @@ void OmpStructureChecker::CheckDistLinear( // Match the loop index variables with the collected symbols from linear // clauses. - if (const auto &loopConstruct{ - std::get>(x.t)}) { - for (const parser::DoConstruct *loop{&*loopConstruct}; loop;) { - if (loop->IsDoNormal()) { - const parser::Name &itrVal{GetLoopIndex(loop)}; - if (itrVal.symbol) { - // Remove the symbol from the collected set - indexVars.erase(&itrVal.symbol->GetUltimate()); - } - collapseVal--; - if (collapseVal == 0) { - break; + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &loopConstruct{ + std::get_if(&*optLoopCons)}) { + for (const parser::DoConstruct *loop{&*loopConstruct}; loop;) { + if (loop->IsDoNormal()) { + const parser::Name &itrVal{GetLoopIndex(loop)}; + if (itrVal.symbol) { + // Remove the symbol from the collected set + indexVars.erase(&itrVal.symbol->GetUltimate()); + } + collapseVal--; + if (collapseVal == 0) { + break; + } } + // Get the next DoConstruct if block is not empty. + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? parser::Unwrap(*it) + : nullptr; } - // Get the next DoConstruct if block is not empty. - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? parser::Unwrap(*it) - : nullptr; } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 885c02e6ec74b..8d741734601a7 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -1796,10 +1796,13 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { - if (const auto &doConstruct{ - std::get>(x.t)}) { - if (doConstruct.value().IsDoWhile()) { - return true; + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &doConstruct{ + std::get_if(&*optLoopCons)}) { + if (doConstruct->IsDoWhile()) { + return true; + } } } } @@ -1962,48 +1965,69 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; - const auto &outer{std::get>(x.t)}; - if (outer.has_value()) { - for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { - if (loop->IsDoConcurrent()) { - // DO CONCURRENT is explicitly allowed for the LOOP construct so long as - // there isn't a COLLAPSE clause - if (isLoopConstruct) { - if (hasCollapseClause) { - // hasCollapseClause implies clause != nullptr - context_.Say(clause->source, - "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US); + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &outer{std::get_if(&*optLoopCons)}) { + for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; + --level) { + if (loop->IsDoConcurrent()) { + // DO CONCURRENT is explicitly allowed for the LOOP construct so long + // as there isn't a COLLAPSE clause + if (isLoopConstruct) { + if (hasCollapseClause) { + // hasCollapseClause implies clause != nullptr + context_.Say(clause->source, + "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US); + } + } else { + auto &stmt = + std::get>(loop->t); + context_.Say(stmt.source, + "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US); } - } else { - auto &stmt = - std::get>(loop->t); - context_.Say(stmt.source, - "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US); - } - } - // go through all the nested do-loops and resolve index variables - const parser::Name *iv{GetLoopIndex(*loop)}; - if (iv) { - if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) { - SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA}); - iv->symbol = symbol; // adjust the symbol within region - AddToContextObjectWithDSA(*symbol, ivDSA); } + // go through all the nested do-loops and resolve index variables + const parser::Name *iv{GetLoopIndex(*loop)}; + if (iv) { + if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) { + SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA}); + iv->symbol = symbol; // adjust the symbol within region + AddToContextObjectWithDSA(*symbol, ivDSA); + } - const auto &block{std::get(loop->t)}; - const auto it{block.begin()}; - loop = it != block.end() ? GetDoConstructIf(*it) : nullptr; + const auto &block{std::get(loop->t)}; + const auto it{block.begin()}; + loop = it != block.end() ? GetDoConstructIf(*it) : nullptr; + } } + CheckAssocLoopLevel(level, GetAssociatedClause()); + } else if (const auto &loop{std::get_if< + common::Indirection>( + &*optLoopCons)}) { + auto &beginDirective = + std::get(loop->value().t); + auto &beginLoopDirective = + std::get(beginDirective.t); + if (beginLoopDirective.v != llvm::omp::Directive::OMPD_unroll && + beginLoopDirective.v != llvm::omp::Directive::OMPD_tile) { + context_.Say(GetContext().directiveSource, + "Only UNROLL or TILE constructs are allowed between an OpenMP Loop Construct and a DO construct"_err_en_US, + parser::ToUpperCaseLetters(llvm::omp::getOpenMPDirectiveName( + GetContext().directive, version) + .str())); + } else { + PrivatizeAssociatedLoopIndexAndCheckLoopLevel(loop->value()); + } + } else { + context_.Say(GetContext().directiveSource, + "A DO loop must follow the %s directive"_err_en_US, + parser::ToUpperCaseLetters( + llvm::omp::getOpenMPDirectiveName(GetContext().directive, version) + .str())); } - CheckAssocLoopLevel(level, GetAssociatedClause()); - } else { - context_.Say(GetContext().directiveSource, - "A DO loop must follow the %s directive"_err_en_US, - parser::ToUpperCaseLetters( - llvm::omp::getOpenMPDirectiveName(GetContext().directive, version) - .str())); } } + void OmpAttributeVisitor::CheckAssocLoopLevel( std::int64_t level, const parser::OmpClause *clause) { if (clause && level != 0) { diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 new file mode 100644 index 0000000000000..a76e7e52100db --- /dev/null +++ b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 @@ -0,0 +1,20 @@ +! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested. + +!RUN: not %flang -fopenmp -fopenmp-version=51 %s 2>&1 | FileCheck %s + +subroutine loop_transformation_construct + implicit none + integer :: I = 10 + integer :: x + integer :: y(I) + + !$omp do + !$omp tile + do i = 1, I + y(i) = y(i) * 5 + end do + !$omp end tile + !$omp end do +end subroutine + +!CHECK: not yet implemented: Unhandled loop directive (tile) diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90 new file mode 100644 index 0000000000000..33b7c5a917619 --- /dev/null +++ b/flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90 @@ -0,0 +1,20 @@ +! Test to ensure TODO message is emitted for unroll OpenMP 5.1 Directives when they are nested. + +!RUN: not %flang -fopenmp -fopenmp-version=51 %s 2>&1 | FileCheck %s + +program loop_transformation_construct + implicit none + integer, parameter :: I = 10 + integer :: x + integer :: y(I) + + !$omp do + !$omp unroll + do x = 1, I + y(x) = y(x) * 5 + end do + !$omp end unroll + !$omp end do +end program loop_transformation_construct + +!CHECK: not yet implemented: Unhandled loop directive (unroll) diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct01.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct01.f90 new file mode 100644 index 0000000000000..baffc2f6e2f1e --- /dev/null +++ b/flang/test/Parser/OpenMP/loop-transformation-construct01.f90 @@ -0,0 +1,74 @@ +! Test the Parse Tree to ensure the OpenMP Loop Transformation Constructs nest correctly with 1 nested loop. + +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE + +subroutine loop_transformation_construct + implicit none + integer :: I = 10 + integer :: x + integer :: y(I) + + !$omp do + !$omp unroll + do i = 1, I + y(i) = y(i) * 5 + end do + !$omp end unroll + !$omp end do +end subroutine + +!CHECK-PARSE: | ExecutionPart -> Block +!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpLoopDirective -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | OmpLoopDirective -> llvm::omp::Directive = unroll +!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | DoConstruct +!CHECK-PARSE-NEXT: | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'y(int(i,kind=8))=5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | Variable = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | DataRef -> Name = 'y' +!CHECK-PARSE-NEXT: | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | Expr = '5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | Multiply +!CHECK-PARSE-NEXT: | | | | | | | | | Expr = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | | DataRef -> Name = 'y' +!CHECK-PARSE-NEXT: | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Expr = '5_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '5' +!CHECK-PARSE-NEXT: | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | OmpLoopDirective -> llvm::omp::Directive = unroll +!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpLoopDirective -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> + +!CHECK-UNPARSE: SUBROUTINE loop_transformation_construct +!CHECK-UNPARSE-NEXT: IMPLICIT NONE +!CHECK-UNPARSE-NEXT: INTEGER :: i = 10_4 +!CHECK-UNPARSE-NEXT: INTEGER x +!CHECK-UNPARSE-NEXT: INTEGER y(i) +!CHECK-UNPARSE-NEXT: !$OMP DO +!CHECK-UNPARSE-NEXT: !$OMP UNROLL +!CHECK-UNPARSE-NEXT: DO i=1_4,i +!CHECK-UNPARSE-NEXT: y(int(i,kind=8))=5_4*y(int(i,kind=8)) +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: !$OMP END UNROLL +!CHECK-UNPARSE-NEXT: !$OMP END DO +!CHECK-UNPARSE-NEXT: END SUBROUTINE diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 new file mode 100644 index 0000000000000..b50e7183841cc --- /dev/null +++ b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 @@ -0,0 +1,85 @@ +! Test the Parse Tree to ensure the OpenMP Loop Transformation Constructs nest correctly with multiple nested loops. + +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE + +subroutine loop_transformation_construct + implicit none + integer :: I = 10 + integer :: x + integer :: y(I) + + !$omp do + !$omp unroll + !$omp tile + do i = 1, I + y(i) = y(i) * 5 + end do + !$omp end tile + !$omp end unroll + !$omp end do +end subroutine + +!CHECK-PARSE: | ExecutionPart -> Block +!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpLoopDirective -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | OmpLoopDirective -> llvm::omp::Directive = unroll +!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | OpenMPLoopConstruct +!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpLoopDirective -> llvm::omp::Directive = tile +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | DoConstruct +!CHECK-PARSE-NEXT: | | | | | | NonLabelDoStmt +!CHECK-PARSE-NEXT: | | | | | | | LoopControl -> LoopBounds +!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Expr = '1_4' +!CHECK-PARSE-NEXT: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK-PARSE-NEXT: | | | | | | | | Scalar -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | Block +!CHECK-PARSE-NEXT: | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'y(int(i,kind=8))=5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | Variable = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | DataRef -> Name = 'y' +!CHECK-PARSE-NEXT: | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | Expr = '5_4*y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | Multiply +!CHECK-PARSE-NEXT: | | | | | | | | | | Expr = 'y(int(i,kind=8))' +!CHECK-PARSE-NEXT: | | | | | | | | | | | Designator -> DataRef -> ArrayElement +!CHECK-PARSE-NEXT: | | | | | | | | | | | | DataRef -> Name = 'y' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | SectionSubscript -> Integer -> Expr = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | | | | Designator -> DataRef -> Name = 'i' +!CHECK-PARSE-NEXT: | | | | | | | | | | Expr = '5_4' +!CHECK-PARSE-NEXT: | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '5' +!CHECK-PARSE-NEXT: | | | | | | EndDoStmt -> +!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | | OmpLoopDirective -> llvm::omp::Directive = tile +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | | OmpLoopDirective -> llvm::omp::Directive = unroll +!CHECK-PARSE-NEXT: | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective +!CHECK-PARSE-NEXT: | | | | OmpLoopDirective -> llvm::omp::Directive = do +!CHECK-PARSE-NEXT: | | | | OmpClauseList -> + +!CHECK-UNPARSE: SUBROUTINE loop_transformation_construct +!CHECK-UNPARSE-NEXT: IMPLICIT NONE +!CHECK-UNPARSE-NEXT: INTEGER :: i = 10_4 +!CHECK-UNPARSE-NEXT: INTEGER x +!CHECK-UNPARSE-NEXT: INTEGER y(i) +!CHECK-UNPARSE-NEXT: !$OMP DO +!CHECK-UNPARSE-NEXT: !$OMP UNROLL +!CHECK-UNPARSE-NEXT: !$OMP TILE +!CHECK-UNPARSE-NEXT: DO i=1_4,i +!CHECK-UNPARSE-NEXT: y(int(i,kind=8))=5_4*y(int(i,kind=8)) +!CHECK-UNPARSE-NEXT: END DO +!CHECK-UNPARSE-NEXT: !$OMP END TILE +!CHECK-UNPARSE-NEXT: !$OMP END UNROLL +!CHECK-UNPARSE-NEXT: !$OMP END DO +!CHECK-UNPARSE-NEXT: END SUBROUTINE diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 new file mode 100644 index 0000000000000..f718efc32aabf --- /dev/null +++ b/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 @@ -0,0 +1,100 @@ +! Testing the Semantics of nested Loop Transformation Constructs + +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + +subroutine loop_transformation_construct1 + implicit none + + !$omp do + !ERROR: A DO loop must follow the UNROLL directive + !$omp unroll +end subroutine + +subroutine loop_transformation_construct2 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !$omp tile + do x = 1, i + v(x) = x(x) * 2 + end do + !$omp end tile + !$omp end do + !ERROR: The END TILE directive must follow the DO loop associated with the loop construct + !$omp end tile +end subroutine + +subroutine loop_transformation_construct2 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !ERROR: Only Loop Transformation Constructs or Loop Nests can be nested within Loop Constructs + !$omp parallel do + do x = 1, i + v(x) = x(x) * 2 + end do +end subroutine + +subroutine loop_transformation_construct3 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + do x = 1, i + v(x) = x(x) * 2 + end do + !ERROR: A DO loop must follow the TILE directive + !$omp tile +end subroutine + +subroutine loop_transformation_construct4 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled + !$omp tile + !$omp unroll full + do x = 1, i + v(x) = x(x) * 2 + end do +end subroutine + +subroutine loop_transformation_construct5 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled + !$omp tile + !$omp unroll + do x = 1, i + v(x) = x(x) * 2 + end do +end subroutine + +subroutine loop_transformation_construct6 + implicit none + integer :: i = 5 + integer :: y + integer :: v(i) + + !$omp do + !$omp tile + !$omp unroll partial(2) + do x = 1, i + v(x) = x(x) * 2 + end do +end subroutine From flang-commits at lists.llvm.org Tue Jul 1 01:09:50 2025 From: flang-commits at lists.llvm.org (Jack Styles via flang-commits) Date: Tue, 01 Jul 2025 01:09:50 -0700 (PDT) Subject: [flang-commits] [flang] [llvm] [mlir] [flang][OpenMP] Enable tiling (PR #143715) In-Reply-To: Message-ID: <686397ce.170a0220.26efc5.0b04@mx.google.com> ================ @@ -131,20 +134,55 @@ class CanonicalizationOfOmp { // Ignore compiler directives. if (GetConstructIf(*nextIt)) continue; + // Keep track of the loops to handle the end loop directives + llvm::SmallVector loops; + loops.push_back(&x); + if (auto *innerOmpLoop{GetOmpIf(*nextIt)}) { + auto &innerBeginDir{ + std::get(innerOmpLoop->t)}; + auto &innerDir{std::get(innerBeginDir.t)}; + if (innerDir.v == llvm::omp::Directive::OMPD_tile) { + auto &innerLoop = std::get< + std::optional>>( + loops.back()->t); + innerLoop = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push_back(&(innerLoop.value().value())); + nextIt = block.erase(nextIt); + } + } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { - // move DoConstruct - std::get>(x.t) = + std::get>(loops.back()->t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - if (nextIt != block.end()) { + while (nextIt != block.end() && !loops.empty()) { ---------------- Stylie777 wrote: Will resolve as #145917 is merged. https://github.com/llvm/llvm-project/pull/143715 From flang-commits at lists.llvm.org Tue Jul 1 01:36:29 2025 From: flang-commits at lists.llvm.org (Benjamin Maxwell via flang-commits) Date: Tue, 01 Jul 2025 01:36:29 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <68639e0d.050a0220.11624b.2b0d@mx.google.com> https://github.com/MacDue commented: You may also want to change `Options.td` currently the note about needing GLIBC 2.40 is only displayed for Clang as it's under ` HelpTextForVariants<[ClangOption, CC1Option]`: https://github.com/llvm/llvm-project/blob/cb806510914ed909b934d285062a9efb13b1cea4/clang/include/clang/Driver/Options.td#L3491-L3494 https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 01:36:29 2025 From: flang-commits at lists.llvm.org (Benjamin Maxwell via flang-commits) Date: Tue, 01 Jul 2025 01:36:29 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <68639e0d.170a0220.e85c6.8e2c@mx.google.com> https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 01:36:29 2025 From: flang-commits at lists.llvm.org (Benjamin Maxwell via flang-commits) Date: Tue, 01 Jul 2025 01:36:29 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <68639e0d.170a0220.378fe6.886b@mx.google.com> ================ @@ -34,6 +34,7 @@ page](https://llvm.org/releases/). * -floop-interchange is now recognized by flang. * -floop-interchange is enabled by default at -O2 and above. +* -fveclib=libmvec is supported for AArch64 (same as Flang/x86 and Clang/AArch64) ---------------- MacDue wrote: Is it worth noting the GLIBC 2.40 dependency here? https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 01:36:50 2025 From: flang-commits at lists.llvm.org (Benjamin Maxwell via flang-commits) Date: Tue, 01 Jul 2025 01:36:50 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <68639e22.050a0220.1c69b6.0773@mx.google.com> https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 01:37:00 2025 From: flang-commits at lists.llvm.org (Benjamin Maxwell via flang-commits) Date: Tue, 01 Jul 2025 01:37:00 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <68639e2c.170a0220.2b25d9.db81@mx.google.com> https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 02:27:00 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 02:27:00 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863a9e4.170a0220.22acaf.8c08@mx.google.com> https://github.com/tblah approved this pull request. LGTM with MacDue's comments. Thank you for contributing this! https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 02:37:23 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 02:37:23 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Split check-omp-structure.cpp into smaller files, NFC (PR #146359) In-Reply-To: Message-ID: <6863ac53.050a0220.229596.1be9@mx.google.com> https://github.com/tblah commented: How bad is the impact on flang build time? From what I remember from when OpenMP lowering was split, the compilation file of each translation unit was so dominated by processing the headers that adding `n` translation units roughly increased compilation time of that part by a factor of `n`. But that aside I think this is a good idea from a code design perspective. Please could you add a comment at the top of each file documenting what should go in that file (just the text from the commit message would be fine). https://github.com/llvm/llvm-project/pull/146359 From flang-commits at lists.llvm.org Tue Jul 1 03:35:32 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 03:35:32 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863b9f4.050a0220.24e87f.3912@mx.google.com> https://github.com/tblah edited https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 03:35:33 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 03:35:33 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863b9f5.050a0220.01ed.1992@mx.google.com> ================ @@ -41,6 +43,178 @@ namespace omp { using namespace Fortran::lower::omp; } +namespace { +// An example of a type that can be used to get the return value from +// the visitor: +// visitor(type_identity) -> result_type +using SomeArgType = evaluate::Type; + +struct GetProc + : public evaluate::Traverse { + using Result = const evaluate::ProcedureDesignator *; + using Base = evaluate::Traverse; + GetProc() : Base(*this) {} + + using Base::operator(); + + static Result Default() { return nullptr; } + + Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; } + static Result Combine(Result a, Result b) { return a != nullptr ? a : b; } +}; + +struct WithType { + WithType(const evaluate::DynamicType &t) : type(t) { + assert(type.category() != common::TypeCategory::Derived && + "Type cannot be a derived type"); + } + + template // + auto visit(VisitorTy &&visitor) const + -> std::invoke_result_t { + switch (type.category()) { + case common::TypeCategory::Integer: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Unsigned: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Real: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Complex: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Logical: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Character: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Derived: + break; + } + llvm_unreachable("Unhandled type"); + } + + const evaluate::DynamicType &type; + +private: + // Shorter names. + static constexpr auto Character = common::TypeCategory::Character; + static constexpr auto Complex = common::TypeCategory::Complex; + static constexpr auto Derived = common::TypeCategory::Derived; ---------------- tblah wrote: Unused https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 03:35:33 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 03:35:33 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863b9f5.170a0220.a10d7.90e9@mx.google.com> ================ @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present +!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } else { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } +!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): +!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: omp.yield(%[[V11]] : f32) +!CHECK: } + +subroutine f00(a, x, y) + real :: a + real, optional :: x, y + !$omp atomic update + a = max(x, a, y) ---------------- tblah wrote: Please also add a test for `min` https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 03:35:33 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 03:35:33 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863b9f5.170a0220.1f72e0.8985@mx.google.com> https://github.com/tblah commented: This is really nice. Just minor comments. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 03:35:33 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 03:35:33 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863b9f5.170a0220.e1fcc.9579@mx.google.com> ================ @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present +!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { +!CHECK: fir.result {{.*}} : f32 ---------------- tblah wrote: Please could you add checks for the non-atomic max operation here so that it is clearer where that part goes https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 03:35:35 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 03:35:35 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863b9f7.170a0220.3d3cb2.0084@mx.google.com> ================ @@ -350,10 +603,28 @@ genAtomicUpdate(lower::AbstractConverter &converter, mlir::Type atomType = fir::unwrapRefType(atomAddr.getType()); // This must exist by now. - semantics::SomeExpr input = *evaluate::GetConvertInput(assign.rhs); - std::vector args = - evaluate::GetTopLevelOperation(input).second; + semantics::SomeExpr rhs = assign.rhs; + semantics::SomeExpr input = *evaluate::GetConvertInput(rhs); + auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); + + const semantics::SomeExpr *atomArg = [&]() { + for (const semantics::SomeExpr &e : args) { ---------------- tblah wrote: ``` /home/USER/llvm-project/flang/lib/Lower/OpenMP/Atomic.cpp:612:41: error: captured structured bindings are a C++20 extension [-Werror,-Wc++20-extensions] 612 | for (const semantics::SomeExpr &e : args) { | ^ /home/USER/llvm-project/flang/lib/Lower/OpenMP/Atomic.cpp:608:17: note: 'args' declared here 608 | auto [opcode, args] = evaluate::GetTopLevelOperation(input); | ^ 1 error generated. ``` With clang 19 https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 03:35:35 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 03:35:35 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863b9f7.170a0220.2c840b.789c@mx.google.com> ================ @@ -237,6 +411,85 @@ makeMemOrderAttr(lower::AbstractConverter &converter, return nullptr; } +static bool replaceArgs(semantics::SomeExpr &expr, + evaluate::ActualArguments &&newArgs) { + return ArgumentReplacer(std::move(newArgs))(expr); +} + +static semantics::SomeExpr makeCall(const evaluate::DynamicType &type, + const evaluate::ProcedureDesignator &proc, + const evaluate::ActualArguments &args) { + return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr { + using Type = typename llvm::remove_cvref_t::type; + return evaluate::AsGenericExpr( + evaluate::FunctionRef(AsRvalue(proc), AsRvalue(args))); + }); +} + +static const evaluate::ProcedureDesignator & +getProcedureDesignator(const semantics::SomeExpr &call) { + const evaluate::ProcedureDesignator *proc = GetProc{}(call); + assert(proc && "Call has no procedure designator"); + return *proc; +} + +static semantics::SomeExpr // +genReducedMinMax(const semantics::SomeExpr &orig, + const semantics::SomeExpr *atomArg, + const std::vector &args) { + // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] + // One of the a_i's, say a_t, must be atom (or a convert of atom). ---------------- tblah wrote: ultra-nit ```suggestion // One of the a_i's, say a_t, must be atomArg (or a convert of atomArg). ``` https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 04:25:43 2025 From: flang-commits at lists.llvm.org (KAWASHIMA Takahiro via flang-commits) Date: Tue, 01 Jul 2025 04:25:43 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863c5b7.170a0220.3a565.7971@mx.google.com> https://github.com/kawashima-fj updated https://github.com/llvm/llvm-project/pull/146453 >From 158eb6d00a123e5d427b4aa52b37824ff3e20840 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Wed, 25 Jun 2025 16:34:21 +0900 Subject: [PATCH 1/2] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 `-fveclib=libmvec` for AArch64 (NEON and SVE) in Clang was supported by #143696. This patch does the same for Flang. Vector functions defined in `libmvec` are used for the following Fortran operator and functions currently. - Power operator (`**`) - Fortran intrinsic functions listed below for `real(kind=4)` and `real(kind=8)` (including their coresponding specific intrinsic functions) - Fortran intrinsic functions which are expanded using functions listed below (for example, `sin` for `complex(kind=8)`) ``` sin tan cos asin acos atan (both atan(x) and atan(y, x)) atan2 cosh tanh asinh acosh atanh erf erfc exp log log10 ``` As with Clang/AArch64, glibc 2.40 or higher is required to use all these functions. --- clang/lib/Driver/ToolChains/Flang.cpp | 9 ++++++++- flang/docs/ReleaseNotes.md | 1 + flang/test/Driver/fveclib-codegen.f90 | 4 ++++ flang/test/Driver/fveclib.f90 | 3 ++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index e4e321ba1e195..75ba2af543c7a 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -484,11 +484,18 @@ void Flang::addTargetOptions(const ArgList &Args, Triple.getArch() != llvm::Triple::x86_64) D.Diag(diag::err_drv_unsupported_opt_for_target) << Name << Triple.getArchName(); - } else if (Name == "libmvec" || Name == "AMDLIBM") { + } else if (Name == "AMDLIBM") { if (Triple.getArch() != llvm::Triple::x86 && Triple.getArch() != llvm::Triple::x86_64) D.Diag(diag::err_drv_unsupported_opt_for_target) << Name << Triple.getArchName(); + } else if (Name == "libmvec") { + if (Triple.getArch() != llvm::Triple::x86 && + Triple.getArch() != llvm::Triple::x86_64 && + Triple.getArch() != llvm::Triple::aarch64 && + Triple.getArch() != llvm::Triple::aarch64_be) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << Name << Triple.getArchName(); } else if (Name == "SLEEF" || Name == "ArmPL") { if (Triple.getArch() != llvm::Triple::aarch64 && Triple.getArch() != llvm::Triple::aarch64_be) diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md index 35da8323e0a10..bd3a215323de2 100644 --- a/flang/docs/ReleaseNotes.md +++ b/flang/docs/ReleaseNotes.md @@ -34,6 +34,7 @@ page](https://llvm.org/releases/). * -floop-interchange is now recognized by flang. * -floop-interchange is enabled by default at -O2 and above. +* -fveclib=libmvec is supported for AArch64 (same as Flang/x86 and Clang/AArch64) ## Windows Support diff --git a/flang/test/Driver/fveclib-codegen.f90 b/flang/test/Driver/fveclib-codegen.f90 index 4cbb1e284f18e..88b31da40167e 100644 --- a/flang/test/Driver/fveclib-codegen.f90 +++ b/flang/test/Driver/fveclib-codegen.f90 @@ -1,6 +1,8 @@ ! test that -fveclib= is passed to the backend ! RUN: %if aarch64-registered-target %{ %flang -S -Ofast -target aarch64-unknown-linux-gnu -fveclib=SLEEF -o - %s | FileCheck %s --check-prefix=SLEEF %} ! RUN: %if x86-registered-target %{ %flang -S -Ofast -target x86_64-unknown-linux-gnu -fveclib=libmvec -o - %s | FileCheck %s %} +! RUN: %if aarch64-registered-target %{ %flang -S -Ofast -target aarch64-unknown-linux-gnu -fveclib=libmvec -march=armv8.2-a+nosve -o - %s | FileCheck %s --check-prefix=LIBMVEC-AARCH64-NEON %} +! RUN: %if aarch64-registered-target %{ %flang -S -Ofast -target aarch64-unknown-linux-gnu -fveclib=libmvec -march=armv8.2-a+sve -o - %s | FileCheck %s --check-prefix=LIBMVEC-AARCH64-SVE %} ! RUN: %if x86-registered-target %{ %flang -S -O3 -ffast-math -target x86_64-unknown-linux-gnu -fveclib=AMDLIBM -o - %s | FileCheck %s --check-prefix=AMDLIBM %} ! RUN: %flang -S -Ofast -fveclib=NoLibrary -o - %s | FileCheck %s --check-prefix=NOLIB @@ -11,6 +13,8 @@ subroutine sb(a, b) ! check that we used a vectorized call to powf() ! CHECK: _ZGVbN4vv_powf ! SLEEF: _ZGVnN4vv_powf +! LIBMVEC-AARCH64-NEON: _ZGVnN4vv_powf +! LIBMVEC-AARCH64-SVE: _ZGVsMxvv_powf ! AMDLIBM: amd_vrs4_powf ! NOLIB: powf a(i) = a(i) ** b(i) diff --git a/flang/test/Driver/fveclib.f90 b/flang/test/Driver/fveclib.f90 index 431a4bfc02522..d21e85e486f8d 100644 --- a/flang/test/Driver/fveclib.f90 +++ b/flang/test/Driver/fveclib.f90 @@ -1,6 +1,7 @@ ! RUN: %flang -### -c -fveclib=none %s 2>&1 | FileCheck -check-prefix CHECK-NOLIB %s ! RUN: %flang -### -c -fveclib=Accelerate %s 2>&1 | FileCheck -check-prefix CHECK-ACCELERATE %s ! RUN: %flang -### -c --target=x86_64-unknown-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-libmvec %s +! RUN: %flang -### -c --target=aarch64-unknown-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-libmvec %s ! RUN: %flang -### -c -fveclib=MASSV %s 2>&1 | FileCheck -check-prefix CHECK-MASSV %s ! RUN: %flang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 | FileCheck -check-prefix CHECK-DARWIN_LIBSYSTEM_M %s ! RUN: %flang -### -c --target=aarch64-none-none -fveclib=SLEEF %s 2>&1 | FileCheck -check-prefix CHECK-SLEEF %s @@ -23,7 +24,7 @@ ! RUN: not %flang --target=x86-none-none -c -fveclib=SLEEF %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s ! RUN: not %flang --target=x86-none-none -c -fveclib=ArmPL %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s -! RUN: not %flang --target=aarch64-none-none -c -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s +! RUN: not %flang --target=riscv64-none-none -c -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s ! RUN: not %flang --target=aarch64-none-none -c -fveclib=SVML %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s ! RUN: not %flang --target=aarch64-none-none -c -fveclib=AMDLIBM %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s ! CHECK-ERROR: unsupported option {{.*}} for target >From 943347688a6fd43be367143aa1b7a19587da2573 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Tue, 1 Jul 2025 20:13:35 +0900 Subject: [PATCH 2/2] Mention the required GLIBC version in help and release note --- clang/include/clang/Driver/Options.td | 3 ++- flang/docs/ReleaseNotes.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9911d752966e3..b4b2ab5f916df 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3487,7 +3487,8 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group; def fveclib : Joined<["-"], "fveclib=">, Group, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, - HelpText<"Use the given vector functions library">, + HelpText<"Use the given vector functions library.\n" + " Note: -fveclib=libmvec on AArch64 requires GLIBC 2.40 or newer.">, HelpTextForVariants<[ClangOption, CC1Option], "Use the given vector functions library.\n" " Note: -fveclib={ArmPL,SLEEF,libmvec} implies -fno-math-errno.\n" diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md index bd3a215323de2..b2dbbcb5630f4 100644 --- a/flang/docs/ReleaseNotes.md +++ b/flang/docs/ReleaseNotes.md @@ -34,7 +34,8 @@ page](https://llvm.org/releases/). * -floop-interchange is now recognized by flang. * -floop-interchange is enabled by default at -O2 and above. -* -fveclib=libmvec is supported for AArch64 (same as Flang/x86 and Clang/AArch64) +* -fveclib=libmvec is supported for AArch64 (same as Flang/x86 and + Clang/AArch64) (requires GLIBC 2.40 or newer) ## Windows Support From flang-commits at lists.llvm.org Tue Jul 1 04:27:36 2025 From: flang-commits at lists.llvm.org (KAWASHIMA Takahiro via flang-commits) Date: Tue, 01 Jul 2025 04:27:36 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863c628.050a0220.2c8367.16ea@mx.google.com> ================ @@ -34,6 +34,7 @@ page](https://llvm.org/releases/). * -floop-interchange is now recognized by flang. * -floop-interchange is enabled by default at -O2 and above. +* -fveclib=libmvec is supported for AArch64 (same as Flang/x86 and Clang/AArch64) ---------------- kawashima-fj wrote: Thanks. I added the GLIBC version information in the help message and the release note. https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 05:34:53 2025 From: flang-commits at lists.llvm.org (Benjamin Maxwell via flang-commits) Date: Tue, 01 Jul 2025 05:34:53 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863d5ed.170a0220.232821.9c9d@mx.google.com> https://github.com/MacDue approved this pull request. Thanks! LGTM! https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 06:02:05 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:02:05 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863dc4d.170a0220.243cf1.71e0@mx.google.com> https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/146423 >From 4cfcf45d8250d648bd4ae0a9110b5034d2495149 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 20 Jun 2025 06:48:18 -0500 Subject: [PATCH 1/3] [flang][OpenMP] Rewrite min/max with more than 2 arguments Given an atomic operation `w = max(w, x1, x2, ...)` rewrite it as `w = max(w, max(x1, x2, ...))`. This will avoid unnecessary non-atomic comparisons inside of the atomic operation (min/max are expanded inline). In particular, if some of the x_i's are optional dummy parameters in the containing function, this will avoid any presence tests within the atomic operation. Fixes https://github.com/llvm/llvm-project/issues/144838 --- flang/lib/Lower/OpenMP/Atomic.cpp | 279 +++++++++++++++++- flang/test/Lower/OpenMP/atomic-update.f90 | 12 +- .../Lower/OpenMP/max-optional-parameters.f90 | 24 ++ 3 files changed, 302 insertions(+), 13 deletions(-) create mode 100644 flang/test/Lower/OpenMP/max-optional-parameters.f90 diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index 33a743f8f9dda..b7a223eff80c5 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -11,6 +11,8 @@ #include "flang/Evaluate/expression.h" #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" +#include "flang/Evaluate/traverse.h" +#include "flang/Evaluate/type.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" @@ -41,6 +43,178 @@ namespace omp { using namespace Fortran::lower::omp; } +namespace { +// An example of a type that can be used to get the return value from +// the visitor: +// visitor(type_identity) -> result_type +using SomeArgType = evaluate::Type; + +struct GetProc + : public evaluate::Traverse { + using Result = const evaluate::ProcedureDesignator *; + using Base = evaluate::Traverse; + GetProc() : Base(*this) {} + + using Base::operator(); + + static Result Default() { return nullptr; } + + Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; } + static Result Combine(Result a, Result b) { return a != nullptr ? a : b; } +}; + +struct WithType { + WithType(const evaluate::DynamicType &t) : type(t) { + assert(type.category() != common::TypeCategory::Derived && + "Type cannot be a derived type"); + } + + template // + auto visit(VisitorTy &&visitor) const + -> std::invoke_result_t { + switch (type.category()) { + case common::TypeCategory::Integer: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Unsigned: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Real: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Complex: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Logical: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Character: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Derived: + break; + } + llvm_unreachable("Unhandled type"); + } + + const evaluate::DynamicType &type; + +private: + // Shorter names. + static constexpr auto Character = common::TypeCategory::Character; + static constexpr auto Complex = common::TypeCategory::Complex; + static constexpr auto Derived = common::TypeCategory::Derived; + static constexpr auto Integer = common::TypeCategory::Integer; + static constexpr auto Logical = common::TypeCategory::Logical; + static constexpr auto Real = common::TypeCategory::Real; + static constexpr auto Unsigned = common::TypeCategory::Unsigned; +}; + +template > +U AsRvalue(T &t) { + U copy{t}; + return std::move(copy); +} + +template +T &&AsRvalue(T &&t) { + return std::move(t); +} + +struct ArgumentReplacer + : public evaluate::Traverse { + using Base = evaluate::Traverse; + using Result = bool; + + Result Default() const { return false; } + + ArgumentReplacer(evaluate::ActualArguments &&newArgs) + : Base(*this), args_(std::move(newArgs)) {} + + using Base::operator(); + + template + Result operator()(const evaluate::FunctionRef &x) { + assert(!done_); + auto &mut = const_cast &>(x); + mut.arguments() = args_; + done_ = true; + return true; + } + + Result Combine(Result &&a, Result &&b) { return a || b; } + +private: + bool done_{false}; + evaluate::ActualArguments &&args_; +}; +} // namespace + [[maybe_unused]] static void dumpAtomicAnalysis(const parser::OpenMPAtomicConstruct::Analysis &analysis) { auto whatStr = [](int k) { @@ -237,6 +411,85 @@ makeMemOrderAttr(lower::AbstractConverter &converter, return nullptr; } +static bool replaceArgs(semantics::SomeExpr &expr, + evaluate::ActualArguments &&newArgs) { + return ArgumentReplacer(std::move(newArgs))(expr); +} + +static semantics::SomeExpr makeCall(const evaluate::DynamicType &type, + const evaluate::ProcedureDesignator &proc, + const evaluate::ActualArguments &args) { + return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr { + using Type = typename llvm::remove_cvref_t::type; + return evaluate::AsGenericExpr( + evaluate::FunctionRef(AsRvalue(proc), AsRvalue(args))); + }); +} + +static const evaluate::ProcedureDesignator & +getProcedureDesignator(const semantics::SomeExpr &call) { + const evaluate::ProcedureDesignator *proc = GetProc{}(call); + assert(proc && "Call has no procedure designator"); + return *proc; +} + +static semantics::SomeExpr // +genReducedMinMax(const semantics::SomeExpr &orig, + const semantics::SomeExpr *atomArg, + const std::vector &args) { + // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] + // One of the a_i's, say a_t, must be atom (or a convert of atom). + // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate + // call = min/max(a_t, tmp). + // Return "call". + + // The min/max intrinsics have 2 mandatory arguments, the rest is optional. + // Make sure that the "tmp = min/max(...)" doesn't promote an optional + // argument to a non-optional position. This could happen if a_t is at + // position 0 or 1. + if (args.size() <= 2) + return orig; + + evaluate::ActualArguments nonAtoms; + + auto AsActual = [](const semantics::SomeExpr &x) { + semantics::SomeExpr copy = x; + return evaluate::ActualArgument(std::move(copy)); + }; + // Semantic checks guarantee that the "atom" shows exactly once in the + // argument list (with potential conversions around it). + // For the first two (non-optional) arguments, if "atom" is among them, + // replace it with another occurrence of the other non-optional argument. + if (atomArg == &args[0]) { + // (atom, x, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[1])); + nonAtoms.push_back(AsActual(args[1])); + } else if (atomArg == &args[1]) { + // (x, atom, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[0])); + } else { + // (x, y, z...) -> unchanged + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[1])); + } + + // The rest of arguments are optional, so we can just skip "atom". + for (size_t i = 2, e = args.size(); i != e; ++i) { + if (atomArg != &args[i]) + nonAtoms.push_back(AsActual(args[i])); + } + + // The type of the intermediate min/max is the same as the type of its + // arguments, which may be different from the type of the original + // expression. The original expression may have additional coverts. + auto tmp = + makeCall(*atomArg->GetType(), getProcedureDesignator(orig), nonAtoms); + semantics::SomeExpr call = orig; + replaceArgs(call, {AsActual(*atomArg), AsActual(tmp)}); + return call; +} + static mlir::Operation * // genAtomicRead(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, mlir::Location loc, @@ -350,10 +603,28 @@ genAtomicUpdate(lower::AbstractConverter &converter, mlir::Type atomType = fir::unwrapRefType(atomAddr.getType()); // This must exist by now. - semantics::SomeExpr input = *evaluate::GetConvertInput(assign.rhs); - std::vector args = - evaluate::GetTopLevelOperation(input).second; + semantics::SomeExpr rhs = assign.rhs; + semantics::SomeExpr input = *evaluate::GetConvertInput(rhs); + auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); + + const semantics::SomeExpr *atomArg = [&]() { + for (const semantics::SomeExpr &e : args) { + if (evaluate::IsSameOrConvertOf(e, atom)) + return &e; + } + llvm_unreachable("Atomic variable not in argument list"); + }(); + + if (opcode == evaluate::operation::Operator::Min || + opcode == evaluate::operation::Operator::Max) { + // Min and max operations are expanded inline, so reduce them to + // operations with exactly two (non-optional) arguments. + rhs = genReducedMinMax(rhs, atomArg, args); + input = *evaluate::GetConvertInput(rhs); + std::tie(opcode, args) = evaluate::GetTopLevelOperation(input); + atomArg = nullptr; // No longer valid. + } for (auto &arg : args) { if (!evaluate::IsSameOrConvertOf(arg, atom)) { mlir::Value val = fir::getBase(converter.genExprValue(arg, naCtx, &loc)); @@ -372,7 +643,7 @@ genAtomicUpdate(lower::AbstractConverter &converter, converter.overrideExprValues(&overrides); mlir::Value updated = - fir::getBase(converter.genExprValue(assign.rhs, stmtCtx, &loc)); + fir::getBase(converter.genExprValue(rhs, stmtCtx, &loc)); mlir::Value converted = builder.createConvert(loc, atomType, updated); builder.create(loc, converted); converter.resetExprOverrides(); diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 index 3f840acefa6e8..f88bbea6fca85 100644 --- a/flang/test/Lower/OpenMP/atomic-update.f90 +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -107,8 +107,6 @@ program OmpAtomicUpdate !CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_Y_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:.*]]: i32): !CHECK: {{.*}} = arith.cmpi sgt, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.select {{.*}}, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.cmpi sgt, {{.*}} !CHECK: %[[TEMP:.*]] = arith.select {{.*}} : i32 !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } @@ -177,13 +175,9 @@ program OmpAtomicUpdate !CHECK: %[[VAL_Z_LOADED:.*]] = fir.load %[[VAL_Z_DECLARE]]#0 : !fir.ref !CHECK: omp.atomic.update %[[VAL_W_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG_W:.*]]: i32): -!CHECK: %[[WX_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WX_MIN:.*]] = arith.select %[[WX_CMP]], %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WXY_CMP:.*]] = arith.cmpi sgt, %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXY_MIN:.*]] = arith.select %[[WXY_CMP]], %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXYZ_CMP:.*]] = arith.cmpi sgt, %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: %[[WXYZ_MIN:.*]] = arith.select %[[WXYZ_CMP]], %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: omp.yield(%[[WXYZ_MIN]] : i32) +!CHECK: %[[W_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], {{.*}} : i32 +!CHECK: %[[WXYZ_MAX:.*]] = arith.select %[[W_CMP]], %[[ARG_W]], {{.*}} : i32 +!CHECK: omp.yield(%[[WXYZ_MAX]] : i32) !CHECK: } !$omp atomic update w = max(w,x,y,z) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90 new file mode 100644 index 0000000000000..2bb6b110cb0ef --- /dev/null +++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90 @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present +!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } else { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } +!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): +!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: omp.yield(%[[V11]] : f32) +!CHECK: } + +subroutine f00(a, x, y) + real :: a + real, optional :: x, y + !$omp atomic update + a = max(x, a, y) +end >From 4302ed81021028baec7427028562802a516ea511 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 07:38:45 -0500 Subject: [PATCH 2/3] Address review comments --- flang/lib/Lower/OpenMP/Atomic.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index b7a223eff80c5..2ab91b239a3cc 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -157,6 +157,7 @@ struct WithType { } break; case common::TypeCategory::Derived: + (void)Derived; break; } llvm_unreachable("Unhandled type"); @@ -438,7 +439,7 @@ genReducedMinMax(const semantics::SomeExpr &orig, const semantics::SomeExpr *atomArg, const std::vector &args) { // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] - // One of the a_i's, say a_t, must be atom (or a convert of atom). + // One of the a_i's, say a_t, must be atomArg. // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate // call = min/max(a_t, tmp). // Return "call". @@ -608,13 +609,14 @@ genAtomicUpdate(lower::AbstractConverter &converter, auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); - const semantics::SomeExpr *atomArg = [&]() { + // Pass args as an argument to avoid capturing a structured binding. + const semantics::SomeExpr *atomArg = [&](auto &args) { for (const semantics::SomeExpr &e : args) { if (evaluate::IsSameOrConvertOf(e, atom)) return &e; } llvm_unreachable("Atomic variable not in argument list"); - }(); + }(args); if (opcode == evaluate::operation::Operator::Min || opcode == evaluate::operation::Operator::Max) { >From 16df2e0eb775fce156db6f5fc62509026a0b2bc5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 08:01:23 -0500 Subject: [PATCH 3/3] Check for non-atomic operations, add min --- .../Lower/OpenMP/max-optional-parameters.f90 | 58 ++++++++++++++++--- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90 index 2bb6b110cb0ef..57a432eeeecec 100644 --- a/flang/test/Lower/OpenMP/max-optional-parameters.f90 +++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90 @@ -3,16 +3,27 @@ ! Check that the presence tests are done outside of the atomic update ! construct. -!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present -!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { -!CHECK: fir.result {{.*}} : f32 +!CHECK-LABEL: func.func @_QPf00 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpf ogt, %[[V4]], %[[V5]] fastmath : f32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : f32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (f32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpf ogt, %[[V8]], %[[V10]] fastmath : f32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : f32 +!CHECK: fir.result %[[V12]] : f32 !CHECK: } else { -!CHECK: fir.result {{.*}} : f32 +!CHECK: fir.result %[[V8]] : f32 !CHECK: } -!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): -!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] -!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V10:[0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[V9]] fastmath : f32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : f32 !CHECK: omp.yield(%[[V11]] : f32) !CHECK: } @@ -22,3 +33,36 @@ subroutine f00(a, x, y) !$omp atomic update a = max(x, a, y) end + + +!CHECK-LABEL: func.func @_QPf01 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpi slt, %[[V4]], %[[V5]] : i32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : i32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (i32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpi slt, %[[V8]], %[[V10]] : i32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : i32 +!CHECK: fir.result %[[V12]] : i32 +!CHECK: } else { +!CHECK: fir.result %[[V8]] : i32 +!CHECK: } +!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: i32): +!CHECK: %[[V10:[0-9]+]] = arith.cmpi slt, %[[ARG]], %[[V9]] : i32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : i32 +!CHECK: omp.yield(%[[V11]] : i32) +!CHECK: } + +subroutine f01(a, x, y) + integer :: a + integer, optional :: x, y + !$omp atomic update + a = min(x, a, y) +end + From flang-commits at lists.llvm.org Tue Jul 1 06:04:04 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:04:04 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863dcc4.630a0220.1dacd2.a0c6@mx.google.com> ================ @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present +!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } else { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } +!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): +!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: omp.yield(%[[V11]] : f32) +!CHECK: } + +subroutine f00(a, x, y) + real :: a + real, optional :: x, y + !$omp atomic update + a = max(x, a, y) ---------------- kparzysz wrote: Done. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 06:04:04 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:04:04 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863dcc4.170a0220.123bb4.6bc7@mx.google.com> ================ @@ -350,10 +603,28 @@ genAtomicUpdate(lower::AbstractConverter &converter, mlir::Type atomType = fir::unwrapRefType(atomAddr.getType()); // This must exist by now. - semantics::SomeExpr input = *evaluate::GetConvertInput(assign.rhs); - std::vector args = - evaluate::GetTopLevelOperation(input).second; + semantics::SomeExpr rhs = assign.rhs; + semantics::SomeExpr input = *evaluate::GetConvertInput(rhs); + auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); + + const semantics::SomeExpr *atomArg = [&]() { + for (const semantics::SomeExpr &e : args) { ---------------- kparzysz wrote: Fixed. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 06:04:04 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:04:04 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863dcc4.170a0220.33ecb2.9c08@mx.google.com> ================ @@ -41,6 +43,178 @@ namespace omp { using namespace Fortran::lower::omp; } +namespace { +// An example of a type that can be used to get the return value from +// the visitor: +// visitor(type_identity) -> result_type +using SomeArgType = evaluate::Type; + +struct GetProc + : public evaluate::Traverse { + using Result = const evaluate::ProcedureDesignator *; + using Base = evaluate::Traverse; + GetProc() : Base(*this) {} + + using Base::operator(); + + static Result Default() { return nullptr; } + + Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; } + static Result Combine(Result a, Result b) { return a != nullptr ? a : b; } +}; + +struct WithType { + WithType(const evaluate::DynamicType &t) : type(t) { + assert(type.category() != common::TypeCategory::Derived && + "Type cannot be a derived type"); + } + + template // + auto visit(VisitorTy &&visitor) const + -> std::invoke_result_t { + switch (type.category()) { + case common::TypeCategory::Integer: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Unsigned: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Real: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Complex: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Logical: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Character: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Derived: + break; + } + llvm_unreachable("Unhandled type"); + } + + const evaluate::DynamicType &type; + +private: + // Shorter names. + static constexpr auto Character = common::TypeCategory::Character; + static constexpr auto Complex = common::TypeCategory::Complex; + static constexpr auto Derived = common::TypeCategory::Derived; ---------------- kparzysz wrote: Fixed. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 06:04:21 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:04:21 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863dcd5.630a0220.13c6f5.9977@mx.google.com> ================ @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present +!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { +!CHECK: fir.result {{.*}} : f32 ---------------- kparzysz wrote: Done. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 06:04:32 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:04:32 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863dce0.050a0220.27550.1be3@mx.google.com> ================ @@ -237,6 +411,85 @@ makeMemOrderAttr(lower::AbstractConverter &converter, return nullptr; } +static bool replaceArgs(semantics::SomeExpr &expr, + evaluate::ActualArguments &&newArgs) { + return ArgumentReplacer(std::move(newArgs))(expr); +} + +static semantics::SomeExpr makeCall(const evaluate::DynamicType &type, + const evaluate::ProcedureDesignator &proc, + const evaluate::ActualArguments &args) { + return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr { + using Type = typename llvm::remove_cvref_t::type; + return evaluate::AsGenericExpr( + evaluate::FunctionRef(AsRvalue(proc), AsRvalue(args))); + }); +} + +static const evaluate::ProcedureDesignator & +getProcedureDesignator(const semantics::SomeExpr &call) { + const evaluate::ProcedureDesignator *proc = GetProc{}(call); + assert(proc && "Call has no procedure designator"); + return *proc; +} + +static semantics::SomeExpr // +genReducedMinMax(const semantics::SomeExpr &orig, + const semantics::SomeExpr *atomArg, + const std::vector &args) { + // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] + // One of the a_i's, say a_t, must be atom (or a convert of atom). ---------------- kparzysz wrote: Fixed. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 06:06:44 2025 From: flang-commits at lists.llvm.org (Kiran Chandramohan via flang-commits) Date: Tue, 01 Jul 2025 06:06:44 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863dd64.170a0220.20bd46.e8c5@mx.google.com> kiranchandramohan wrote: I have not gone through this in detail. But why is evaluate processing done during Lowering? Can this canonicalization be performed in the semantics stage? https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 06:49:35 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:49:35 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863e76f.170a0220.188e3a.ab91@mx.google.com> https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/146423 >From 4cfcf45d8250d648bd4ae0a9110b5034d2495149 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 20 Jun 2025 06:48:18 -0500 Subject: [PATCH 1/3] [flang][OpenMP] Rewrite min/max with more than 2 arguments Given an atomic operation `w = max(w, x1, x2, ...)` rewrite it as `w = max(w, max(x1, x2, ...))`. This will avoid unnecessary non-atomic comparisons inside of the atomic operation (min/max are expanded inline). In particular, if some of the x_i's are optional dummy parameters in the containing function, this will avoid any presence tests within the atomic operation. Fixes https://github.com/llvm/llvm-project/issues/144838 --- flang/lib/Lower/OpenMP/Atomic.cpp | 279 +++++++++++++++++- flang/test/Lower/OpenMP/atomic-update.f90 | 12 +- .../Lower/OpenMP/max-optional-parameters.f90 | 24 ++ 3 files changed, 302 insertions(+), 13 deletions(-) create mode 100644 flang/test/Lower/OpenMP/max-optional-parameters.f90 diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index 33a743f8f9dda..b7a223eff80c5 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -11,6 +11,8 @@ #include "flang/Evaluate/expression.h" #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" +#include "flang/Evaluate/traverse.h" +#include "flang/Evaluate/type.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" @@ -41,6 +43,178 @@ namespace omp { using namespace Fortran::lower::omp; } +namespace { +// An example of a type that can be used to get the return value from +// the visitor: +// visitor(type_identity) -> result_type +using SomeArgType = evaluate::Type; + +struct GetProc + : public evaluate::Traverse { + using Result = const evaluate::ProcedureDesignator *; + using Base = evaluate::Traverse; + GetProc() : Base(*this) {} + + using Base::operator(); + + static Result Default() { return nullptr; } + + Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; } + static Result Combine(Result a, Result b) { return a != nullptr ? a : b; } +}; + +struct WithType { + WithType(const evaluate::DynamicType &t) : type(t) { + assert(type.category() != common::TypeCategory::Derived && + "Type cannot be a derived type"); + } + + template // + auto visit(VisitorTy &&visitor) const + -> std::invoke_result_t { + switch (type.category()) { + case common::TypeCategory::Integer: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Unsigned: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Real: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Complex: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Logical: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Character: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Derived: + break; + } + llvm_unreachable("Unhandled type"); + } + + const evaluate::DynamicType &type; + +private: + // Shorter names. + static constexpr auto Character = common::TypeCategory::Character; + static constexpr auto Complex = common::TypeCategory::Complex; + static constexpr auto Derived = common::TypeCategory::Derived; + static constexpr auto Integer = common::TypeCategory::Integer; + static constexpr auto Logical = common::TypeCategory::Logical; + static constexpr auto Real = common::TypeCategory::Real; + static constexpr auto Unsigned = common::TypeCategory::Unsigned; +}; + +template > +U AsRvalue(T &t) { + U copy{t}; + return std::move(copy); +} + +template +T &&AsRvalue(T &&t) { + return std::move(t); +} + +struct ArgumentReplacer + : public evaluate::Traverse { + using Base = evaluate::Traverse; + using Result = bool; + + Result Default() const { return false; } + + ArgumentReplacer(evaluate::ActualArguments &&newArgs) + : Base(*this), args_(std::move(newArgs)) {} + + using Base::operator(); + + template + Result operator()(const evaluate::FunctionRef &x) { + assert(!done_); + auto &mut = const_cast &>(x); + mut.arguments() = args_; + done_ = true; + return true; + } + + Result Combine(Result &&a, Result &&b) { return a || b; } + +private: + bool done_{false}; + evaluate::ActualArguments &&args_; +}; +} // namespace + [[maybe_unused]] static void dumpAtomicAnalysis(const parser::OpenMPAtomicConstruct::Analysis &analysis) { auto whatStr = [](int k) { @@ -237,6 +411,85 @@ makeMemOrderAttr(lower::AbstractConverter &converter, return nullptr; } +static bool replaceArgs(semantics::SomeExpr &expr, + evaluate::ActualArguments &&newArgs) { + return ArgumentReplacer(std::move(newArgs))(expr); +} + +static semantics::SomeExpr makeCall(const evaluate::DynamicType &type, + const evaluate::ProcedureDesignator &proc, + const evaluate::ActualArguments &args) { + return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr { + using Type = typename llvm::remove_cvref_t::type; + return evaluate::AsGenericExpr( + evaluate::FunctionRef(AsRvalue(proc), AsRvalue(args))); + }); +} + +static const evaluate::ProcedureDesignator & +getProcedureDesignator(const semantics::SomeExpr &call) { + const evaluate::ProcedureDesignator *proc = GetProc{}(call); + assert(proc && "Call has no procedure designator"); + return *proc; +} + +static semantics::SomeExpr // +genReducedMinMax(const semantics::SomeExpr &orig, + const semantics::SomeExpr *atomArg, + const std::vector &args) { + // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] + // One of the a_i's, say a_t, must be atom (or a convert of atom). + // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate + // call = min/max(a_t, tmp). + // Return "call". + + // The min/max intrinsics have 2 mandatory arguments, the rest is optional. + // Make sure that the "tmp = min/max(...)" doesn't promote an optional + // argument to a non-optional position. This could happen if a_t is at + // position 0 or 1. + if (args.size() <= 2) + return orig; + + evaluate::ActualArguments nonAtoms; + + auto AsActual = [](const semantics::SomeExpr &x) { + semantics::SomeExpr copy = x; + return evaluate::ActualArgument(std::move(copy)); + }; + // Semantic checks guarantee that the "atom" shows exactly once in the + // argument list (with potential conversions around it). + // For the first two (non-optional) arguments, if "atom" is among them, + // replace it with another occurrence of the other non-optional argument. + if (atomArg == &args[0]) { + // (atom, x, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[1])); + nonAtoms.push_back(AsActual(args[1])); + } else if (atomArg == &args[1]) { + // (x, atom, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[0])); + } else { + // (x, y, z...) -> unchanged + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[1])); + } + + // The rest of arguments are optional, so we can just skip "atom". + for (size_t i = 2, e = args.size(); i != e; ++i) { + if (atomArg != &args[i]) + nonAtoms.push_back(AsActual(args[i])); + } + + // The type of the intermediate min/max is the same as the type of its + // arguments, which may be different from the type of the original + // expression. The original expression may have additional coverts. + auto tmp = + makeCall(*atomArg->GetType(), getProcedureDesignator(orig), nonAtoms); + semantics::SomeExpr call = orig; + replaceArgs(call, {AsActual(*atomArg), AsActual(tmp)}); + return call; +} + static mlir::Operation * // genAtomicRead(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, mlir::Location loc, @@ -350,10 +603,28 @@ genAtomicUpdate(lower::AbstractConverter &converter, mlir::Type atomType = fir::unwrapRefType(atomAddr.getType()); // This must exist by now. - semantics::SomeExpr input = *evaluate::GetConvertInput(assign.rhs); - std::vector args = - evaluate::GetTopLevelOperation(input).second; + semantics::SomeExpr rhs = assign.rhs; + semantics::SomeExpr input = *evaluate::GetConvertInput(rhs); + auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); + + const semantics::SomeExpr *atomArg = [&]() { + for (const semantics::SomeExpr &e : args) { + if (evaluate::IsSameOrConvertOf(e, atom)) + return &e; + } + llvm_unreachable("Atomic variable not in argument list"); + }(); + + if (opcode == evaluate::operation::Operator::Min || + opcode == evaluate::operation::Operator::Max) { + // Min and max operations are expanded inline, so reduce them to + // operations with exactly two (non-optional) arguments. + rhs = genReducedMinMax(rhs, atomArg, args); + input = *evaluate::GetConvertInput(rhs); + std::tie(opcode, args) = evaluate::GetTopLevelOperation(input); + atomArg = nullptr; // No longer valid. + } for (auto &arg : args) { if (!evaluate::IsSameOrConvertOf(arg, atom)) { mlir::Value val = fir::getBase(converter.genExprValue(arg, naCtx, &loc)); @@ -372,7 +643,7 @@ genAtomicUpdate(lower::AbstractConverter &converter, converter.overrideExprValues(&overrides); mlir::Value updated = - fir::getBase(converter.genExprValue(assign.rhs, stmtCtx, &loc)); + fir::getBase(converter.genExprValue(rhs, stmtCtx, &loc)); mlir::Value converted = builder.createConvert(loc, atomType, updated); builder.create(loc, converted); converter.resetExprOverrides(); diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 index 3f840acefa6e8..f88bbea6fca85 100644 --- a/flang/test/Lower/OpenMP/atomic-update.f90 +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -107,8 +107,6 @@ program OmpAtomicUpdate !CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_Y_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:.*]]: i32): !CHECK: {{.*}} = arith.cmpi sgt, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.select {{.*}}, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.cmpi sgt, {{.*}} !CHECK: %[[TEMP:.*]] = arith.select {{.*}} : i32 !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } @@ -177,13 +175,9 @@ program OmpAtomicUpdate !CHECK: %[[VAL_Z_LOADED:.*]] = fir.load %[[VAL_Z_DECLARE]]#0 : !fir.ref !CHECK: omp.atomic.update %[[VAL_W_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG_W:.*]]: i32): -!CHECK: %[[WX_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WX_MIN:.*]] = arith.select %[[WX_CMP]], %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WXY_CMP:.*]] = arith.cmpi sgt, %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXY_MIN:.*]] = arith.select %[[WXY_CMP]], %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXYZ_CMP:.*]] = arith.cmpi sgt, %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: %[[WXYZ_MIN:.*]] = arith.select %[[WXYZ_CMP]], %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: omp.yield(%[[WXYZ_MIN]] : i32) +!CHECK: %[[W_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], {{.*}} : i32 +!CHECK: %[[WXYZ_MAX:.*]] = arith.select %[[W_CMP]], %[[ARG_W]], {{.*}} : i32 +!CHECK: omp.yield(%[[WXYZ_MAX]] : i32) !CHECK: } !$omp atomic update w = max(w,x,y,z) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90 new file mode 100644 index 0000000000000..2bb6b110cb0ef --- /dev/null +++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90 @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present +!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } else { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } +!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): +!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: omp.yield(%[[V11]] : f32) +!CHECK: } + +subroutine f00(a, x, y) + real :: a + real, optional :: x, y + !$omp atomic update + a = max(x, a, y) +end >From 4302ed81021028baec7427028562802a516ea511 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 07:38:45 -0500 Subject: [PATCH 2/3] Address review comments --- flang/lib/Lower/OpenMP/Atomic.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index b7a223eff80c5..2ab91b239a3cc 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -157,6 +157,7 @@ struct WithType { } break; case common::TypeCategory::Derived: + (void)Derived; break; } llvm_unreachable("Unhandled type"); @@ -438,7 +439,7 @@ genReducedMinMax(const semantics::SomeExpr &orig, const semantics::SomeExpr *atomArg, const std::vector &args) { // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] - // One of the a_i's, say a_t, must be atom (or a convert of atom). + // One of the a_i's, say a_t, must be atomArg. // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate // call = min/max(a_t, tmp). // Return "call". @@ -608,13 +609,14 @@ genAtomicUpdate(lower::AbstractConverter &converter, auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); - const semantics::SomeExpr *atomArg = [&]() { + // Pass args as an argument to avoid capturing a structured binding. + const semantics::SomeExpr *atomArg = [&](auto &args) { for (const semantics::SomeExpr &e : args) { if (evaluate::IsSameOrConvertOf(e, atom)) return &e; } llvm_unreachable("Atomic variable not in argument list"); - }(); + }(args); if (opcode == evaluate::operation::Operator::Min || opcode == evaluate::operation::Operator::Max) { >From 16df2e0eb775fce156db6f5fc62509026a0b2bc5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 08:01:23 -0500 Subject: [PATCH 3/3] Check for non-atomic operations, add min --- .../Lower/OpenMP/max-optional-parameters.f90 | 58 ++++++++++++++++--- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90 index 2bb6b110cb0ef..57a432eeeecec 100644 --- a/flang/test/Lower/OpenMP/max-optional-parameters.f90 +++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90 @@ -3,16 +3,27 @@ ! Check that the presence tests are done outside of the atomic update ! construct. -!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present -!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { -!CHECK: fir.result {{.*}} : f32 +!CHECK-LABEL: func.func @_QPf00 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpf ogt, %[[V4]], %[[V5]] fastmath : f32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : f32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (f32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpf ogt, %[[V8]], %[[V10]] fastmath : f32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : f32 +!CHECK: fir.result %[[V12]] : f32 !CHECK: } else { -!CHECK: fir.result {{.*}} : f32 +!CHECK: fir.result %[[V8]] : f32 !CHECK: } -!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): -!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] -!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V10:[0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[V9]] fastmath : f32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : f32 !CHECK: omp.yield(%[[V11]] : f32) !CHECK: } @@ -22,3 +33,36 @@ subroutine f00(a, x, y) !$omp atomic update a = max(x, a, y) end + + +!CHECK-LABEL: func.func @_QPf01 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpi slt, %[[V4]], %[[V5]] : i32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : i32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (i32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpi slt, %[[V8]], %[[V10]] : i32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : i32 +!CHECK: fir.result %[[V12]] : i32 +!CHECK: } else { +!CHECK: fir.result %[[V8]] : i32 +!CHECK: } +!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: i32): +!CHECK: %[[V10:[0-9]+]] = arith.cmpi slt, %[[ARG]], %[[V9]] : i32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : i32 +!CHECK: omp.yield(%[[V11]] : i32) +!CHECK: } + +subroutine f01(a, x, y) + integer :: a + integer, optional :: x, y + !$omp atomic update + a = min(x, a, y) +end + From flang-commits at lists.llvm.org Tue Jul 1 06:52:05 2025 From: flang-commits at lists.llvm.org (Tarun Prabhu via flang-commits) Date: Tue, 01 Jul 2025 06:52:05 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863e805.170a0220.300898.be3b@mx.google.com> ================ @@ -3487,7 +3487,8 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group; def fveclib : Joined<["-"], "fveclib=">, Group, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, - HelpText<"Use the given vector functions library">, + HelpText<"Use the given vector functions library.\n" + " Note: -fveclib=libmvec on AArch64 requires GLIBC 2.40 or newer.">, ---------------- tarunprabhu wrote: Is the constraint on `glibc` >=2.40 true for `clang` as well? What happens if the user attempts to build with an older `glibc`? Will they see a configure-time error or will the build fail? https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 06:54:26 2025 From: flang-commits at lists.llvm.org (Tarun Prabhu via flang-commits) Date: Tue, 01 Jul 2025 06:54:26 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863e892.170a0220.10297b.b895@mx.google.com> ================ @@ -3487,7 +3487,8 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group; def fveclib : Joined<["-"], "fveclib=">, Group, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, - HelpText<"Use the given vector functions library">, + HelpText<"Use the given vector functions library.\n" + " Note: -fveclib=libmvec on AArch64 requires GLIBC 2.40 or newer.">, ---------------- tarunprabhu wrote: Ach. Please ignore the question about the constraint on glibc. The second question about failure modes with older glibc still holds though. https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 06:55:56 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 06:55:56 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Split check-omp-structure.cpp into smaller files, NFC (PR #146359) In-Reply-To: Message-ID: <6863e8ec.170a0220.38aae.b5cd@mx.google.com> kparzysz wrote: I'm not sure about the compile time. I'm guessing that with PCH enabled this shouldn't have any significant impact, but I'll try to see if I can get some measurements. https://github.com/llvm/llvm-project/pull/146359 From flang-commits at lists.llvm.org Tue Jul 1 06:58:52 2025 From: flang-commits at lists.llvm.org (via flang-commits) Date: Tue, 01 Jul 2025 06:58:52 -0700 (PDT) Subject: [flang-commits] [flang] [llvm] [mlir] [flang][debug] Generate DISubprogramAttr for omp::TargetOp. (PR #146532) In-Reply-To: Message-ID: <6863e99c.170a0220.2bcad7.0f4c@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-mlir Author: Abid Qadeer (abidh)
Changes This is combination of https://github.com/llvm/llvm-project/pull/138149 and https://github.com/llvm/llvm-project/pull/138039 which were opened separately for ease of reviewing. Only other change is adjustments in 2 tests which have gone in since. There are `DeclareOp` present for the variables mapped into target region. That allow us to generate debug information for them. But the `TargetOp` is still part of parent function and those variables get the parent function's `DISubprogram` as a scope. In `OMPIRBuilder`, a new function is created for the `TargetOp`. We also create a new `DISubprogram` for it. All the variables that were in the target region now have to be updated to have the correct scope. This after the fact updating of debug information becomes very difficult in certain cases. Take the example of variable arrays. The type of those arrays depend on the artificial `DILocalVariable`(s) which hold the size(s) of the array. This new function will now require that we generate the new variable and and new types. Similar issue exist for character type variables too. To avoid this after the fact updating, this PR generates a `DISubprogramAttr` for the `TargetOp` while generating the debug info in `flang`. Then we don't need to generate a `DISubprogram` in `OMPIRBuilder`. This change is made a bit more complicated by the the fact that in new scheme, the debug location already points to the new `DISubprogram` by the time it reaches `convertOmpTarget`. But we need some code generation in the parent function so we have to carefully manage the debug locations. This fixes issue `#134991`. --- Patch is 35.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146532.diff 14 Files Affected: - (modified) flang/lib/Optimizer/Transforms/AddDebugInfo.cpp (+116-1) - (added) flang/test/Transforms/debug-omp-target-op-1.fir (+40) - (added) flang/test/Transforms/debug-omp-target-op-2.fir (+53) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+8-38) - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+21) - (added) mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir (+27) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir (+3-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-map-link-loc.mlir (+5-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-nowait.mlir (+4-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir (+8-5) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-var-2.mlir (+8-5) - (modified) mlir/test/Target/LLVMIR/omptarget-debug.mlir (+6-2) - (modified) mlir/test/Target/LLVMIR/omptarget-debug2.mlir (+6-2) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir (+6-2) ``````````diff diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index 8fa2f38818c02..6eb914e67fd54 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -35,6 +35,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -104,6 +105,37 @@ bool debugInfoIsAlreadySet(mlir::Location loc) { return false; } +// Generates the name for the artificial DISubprogram that we are going to +// generate for omp::TargetOp. Its logic is borrowed from +// getTargetEntryUniqueInfo and +// TargetRegionEntryInfo::getTargetRegionEntryFnName to generate the same name. +// But even if there was a slight mismatch, it is not a problem because this +// name is artificial and not important to debug experience. +mlir::StringAttr getTargetFunctionName(mlir::MLIRContext *context, + mlir::Location Loc, + llvm::StringRef parentName) { + auto fileLoc = Loc->findInstanceOf(); + + assert(fileLoc && "No file found from location"); + llvm::StringRef fileName = fileLoc.getFilename().getValue(); + + llvm::sys::fs::UniqueID id; + uint64_t line = fileLoc.getLine(); + size_t fileId; + size_t deviceId; + if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) { + fileId = llvm::hash_value(fileName.str()); + deviceId = 0xdeadf17e; + } else { + fileId = id.getFile(); + deviceId = id.getDevice(); + } + return mlir::StringAttr::get( + context, + std::string(llvm::formatv("__omp_offloading_{0:x-}_{1:x-}_{2}_l{3}", + deviceId, fileId, parentName, line))); +} + } // namespace bool AddDebugInfoPass::createCommonBlockGlobal( @@ -446,6 +478,79 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, line - 1, false); } + auto addTargetOpDISP = [&](bool lineTableOnly, + llvm::ArrayRef entities) { + // When we process the DeclareOp inside the OpenMP target region, all the + // variables get the DISubprogram of the parent function of the target op as + // the scope. In the codegen (to llvm ir), OpenMP target op results in the + // creation of a separate function. As the variables in the debug info have + // the DISubprogram of the parent function as the scope, the variables + // need to be updated at codegen time to avoid verification failures. + + // This updating after the fact becomes more and more difficult when types + // are dependent on local variables like in the case of variable size arrays + // or string. We not only have to generate new variables but also new types. + // We can avoid this problem by generating a DISubprogramAttr here for the + // target op and make sure that all the variables inside the target region + // get the correct scope in the first place. + funcOp.walk([&](mlir::omp::TargetOp targetOp) { + unsigned line = getLineFromLoc(targetOp.getLoc()); + mlir::StringAttr name = + getTargetFunctionName(context, targetOp.getLoc(), funcOp.getName()); + mlir::LLVM::DISubprogramFlags flags = + mlir::LLVM::DISubprogramFlags::Definition | + mlir::LLVM::DISubprogramFlags::LocalToUnit; + if (isOptimized) + flags = flags | mlir::LLVM::DISubprogramFlags::Optimized; + + mlir::DistinctAttr id = + mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + llvm::SmallVector types; + types.push_back(mlir::LLVM::DINullTypeAttr::get(context)); + for (auto arg : targetOp.getRegion().getArguments()) { + auto tyAttr = typeGen.convertType(fir::unwrapRefType(arg.getType()), + fileAttr, cuAttr, /*declOp=*/nullptr); + types.push_back(tyAttr); + } + CC = llvm::dwarf::getCallingConvention("DW_CC_normal"); + mlir::LLVM::DISubroutineTypeAttr spTy = + mlir::LLVM::DISubroutineTypeAttr::get(context, CC, types); + if (lineTableOnly) { + auto spAttr = mlir::LLVM::DISubprogramAttr::get( + context, id, compilationUnit, Scope, name, name, funcFileAttr, line, + line, flags, spTy, /*retainedNodes=*/{}, /*annotations=*/{}); + targetOp->setLoc(builder.getFusedLoc({targetOp.getLoc()}, spAttr)); + return; + } + mlir::DistinctAttr recId = + mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + auto spAttr = mlir::LLVM::DISubprogramAttr::get( + context, recId, /*isRecSelf=*/true, id, compilationUnit, Scope, name, + name, funcFileAttr, line, line, flags, spTy, /*retainedNodes=*/{}, + /*annotations=*/{}); + + // Make sure that information about the imported modules is copied in the + // new function. + llvm::SmallVector opEntities; + for (mlir::LLVM::DINodeAttr N : entities) { + if (auto entity = mlir::dyn_cast(N)) { + auto importedEntity = mlir::LLVM::DIImportedEntityAttr::get( + context, llvm::dwarf::DW_TAG_imported_module, spAttr, + entity.getEntity(), fileAttr, /*line=*/1, /*name=*/nullptr, + /*elements*/ {}); + opEntities.push_back(importedEntity); + } + } + + id = mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + spAttr = mlir::LLVM::DISubprogramAttr::get( + context, recId, /*isRecSelf=*/false, id, compilationUnit, Scope, name, + name, funcFileAttr, line, line, flags, spTy, opEntities, + /*annotations=*/{}); + targetOp->setLoc(builder.getFusedLoc({targetOp.getLoc()}, spAttr)); + }); + }; + // Don't process variables if user asked for line tables only. if (debugLevel == mlir::LLVM::DIEmissionKind::LineTablesOnly) { auto spAttr = mlir::LLVM::DISubprogramAttr::get( @@ -453,6 +558,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, line, line, subprogramFlags, subTypeAttr, /*retainedNodes=*/{}, /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); + addTargetOpDISP(/*lineTableOnly=*/true, /*entities=*/{}); return; } @@ -510,9 +616,18 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, funcName, fullName, funcFileAttr, line, line, subprogramFlags, subTypeAttr, entities, /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); + addTargetOpDISP(/*lineTableOnly=*/false, entities); funcOp.walk([&](fir::cg::XDeclareOp declOp) { - handleDeclareOp(declOp, fileAttr, spAttr, typeGen, symbolTable); + mlir::LLVM::DISubprogramAttr spTy = spAttr; + if (auto tOp = declOp->getParentOfType()) { + if (auto fusedLoc = llvm::dyn_cast(tOp.getLoc())) { + if (auto sp = llvm::dyn_cast( + fusedLoc.getMetadata())) + spTy = sp; + } + } + handleDeclareOp(declOp, fileAttr, spTy, typeGen, symbolTable); }); // commonBlockMap ensures that we don't create multiple DICommonBlockAttr of // the same name in one function. But it is ok (rather required) to create diff --git a/flang/test/Transforms/debug-omp-target-op-1.fir b/flang/test/Transforms/debug-omp-target-op-1.fir new file mode 100644 index 0000000000000..6b895b732c42b --- /dev/null +++ b/flang/test/Transforms/debug-omp-target-op-1.fir @@ -0,0 +1,40 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s +// RUN: fir-opt --add-debug-info="debug-level=LineTablesOnly" --mlir-print-debuginfo %s | FileCheck %s --check-prefix=LINETABLE + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @_QQmain() attributes {fir.bindc_name = "test"} { + %c13_i32 = arith.constant 13 : i32 + %c12_i32 = arith.constant 12 : i32 + %c6_i32 = arith.constant 6 : i32 + %c1_i32 = arith.constant 1 : i32 + %c5_i32 = arith.constant 5 : i32 + %0 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} loc(#loc1) + %1 = fircg.ext_declare %0 {uniq_name = "_QFEx"} : (!fir.ref) -> !fir.ref loc(#loc1) + %2 = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} loc(#loc2) + %3 = fircg.ext_declare %2 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc2) + %4 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "x"} + %5 = omp.map.info var_ptr(%3 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "y"} + omp.target map_entries(%4 -> %arg0, %5 -> %arg1 : !fir.ref, !fir.ref) { + %16 = fircg.ext_declare %arg0 {uniq_name = "_QFEx"} : (!fir.ref) -> !fir.ref loc(#loc3) + %17 = fircg.ext_declare %arg1 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc4) + omp.terminator + } loc(#loc5) + return + } +} +#loc1 = loc("test.f90":1:1) +#loc2 = loc("test.f90":3:1) +#loc3 = loc("test.f90":7:1) +#loc4 = loc("test.f90":8:1) +#loc5 = loc("test.f90":6:1) + +// CHECK: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "test"{{.*}}> +// CHECK: #[[SP1:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_QQmain_l6"{{.*}}line = 6{{.*}}subprogramFlags = "LocalToUnit|Definition"{{.*}}> +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable + +// LINETABLE: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "test"{{.*}}> +// LINETABLE: #[[SP1:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_QQmain_l6"{{.*}}line = 6{{.*}}subprogramFlags = "LocalToUnit|Definition"{{.*}}> +// LINETABLE-NOT: #llvm.di_local_variable diff --git a/flang/test/Transforms/debug-omp-target-op-2.fir b/flang/test/Transforms/debug-omp-target-op-2.fir new file mode 100644 index 0000000000000..15dcf2389b21d --- /dev/null +++ b/flang/test/Transforms/debug-omp-target-op-2.fir @@ -0,0 +1,53 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @fn_(%arg0: !fir.ref> {fir.bindc_name = "b"}, %arg1: !fir.ref {fir.bindc_name = "c"}, %arg2: !fir.ref {fir.bindc_name = "d"}) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.alloca i32 + %1 = fir.alloca i32 + %2 = fir.undefined !fir.dscope + %3 = fircg.ext_declare %arg1 dummy_scope %2 {uniq_name = "_QFfnEc"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc2) + %4 = fircg.ext_declare %arg2 dummy_scope %2 {uniq_name = "_QFfnEd"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc3) + %5 = fir.load %3 : !fir.ref + %6 = fir.convert %5 : (i32) -> index + %9 = fir.load %4 : !fir.ref + %10 = fir.convert %9 : (i32) -> index + %15 = fircg.ext_declare %arg0(%6, %10) dummy_scope %2 {uniq_name = "_QFfnEb"} : (!fir.ref>, index, index, !fir.dscope) -> !fir.ref> loc(#loc4) + %16 = fircg.ext_embox %15(%6, %10) : (!fir.ref>, index, index) -> !fir.box> + %17:3 = fir.box_dims %16, %c0 : (!fir.box>, index) -> (index, index, index) + %18 = arith.subi %17#1, %c1 : index + %19 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%18 : index) extent(%17#1 : index) stride(%17#2 : index) start_idx(%c1 : index) {stride_in_bytes = true} + %20 = arith.muli %17#2, %17#1 : index + %21:3 = fir.box_dims %16, %c1 : (!fir.box>, index) -> (index, index, index) + %22 = arith.subi %21#1, %c1 : index + %23 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%22 : index) extent(%21#1 : index) stride(%20 : index) start_idx(%c1 : index) {stride_in_bytes = true} + %24 = omp.map.info var_ptr(%15 : !fir.ref>, i32) map_clauses(tofrom) capture(ByRef) bounds(%19, %23) -> !fir.ref> {name = "b"} + %25 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} + %26 = omp.map.info var_ptr(%0 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} + omp.target map_entries(%24 -> %arg3, %25 -> %arg4, %26 -> %arg5 : !fir.ref>, !fir.ref, !fir.ref) { + %27 = fir.load %arg5 : !fir.ref + %28 = fir.load %arg4 : !fir.ref + %29 = fir.convert %27 : (i32) -> index + %31 = fir.convert %28 : (i32) -> index + %37 = fircg.ext_declare %arg3(%29, %31) {uniq_name = "_QFfnEb"} : (!fir.ref>, index, index) -> !fir.ref> loc(#loc5) + omp.terminator + } loc(#loc6) + return + } loc(#loc7) +} +#loc1 = loc("test.f90":1:1) +#loc2 = loc("test.f90":3:1) +#loc3 = loc("test.f90":7:1) +#loc4 = loc("test.f90":8:1) +#loc5 = loc("test.f90":6:1) +#loc6 = loc("test.f90":16:1) +#loc7 = loc("test.f90":26:1) + + +// Test that variable size arrays inside target regions get their own +// compiler generated variables for size. + +// CHECK: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_fn__l16"{{.*}}> +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 85451b1233f96..db792a3b52d24 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6891,23 +6891,19 @@ static void FixupDebugInfoForOutlinedFunction( if (!NewSP) return; - DenseMap Cache; SmallDenseMap RemappedVariables; auto GetUpdatedDIVariable = [&](DILocalVariable *OldVar, unsigned arg) { - auto NewSP = Func->getSubprogram(); DILocalVariable *&NewVar = RemappedVariables[OldVar]; // Only use cached variable if the arg number matches. This is important // so that DIVariable created for privatized variables are not discarded. if (NewVar && (arg == NewVar->getArg())) return NewVar; - DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( - *OldVar->getScope(), *NewSP, Builder.getContext(), Cache); NewVar = llvm::DILocalVariable::get( - Builder.getContext(), NewScope, OldVar->getName(), OldVar->getFile(), - OldVar->getLine(), OldVar->getType(), arg, OldVar->getFlags(), - OldVar->getAlignInBits(), OldVar->getAnnotations()); + Builder.getContext(), OldVar->getScope(), OldVar->getName(), + OldVar->getFile(), OldVar->getLine(), OldVar->getType(), arg, + OldVar->getFlags(), OldVar->getAlignInBits(), OldVar->getAnnotations()); return NewVar; }; @@ -6921,7 +6917,8 @@ static void FixupDebugInfoForOutlinedFunction( ArgNo = std::get<1>(Iter->second) + 1; } } - DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo)); + if (ArgNo != 0) + DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo)); }; // The location and scope of variable intrinsics and records still point to @@ -7000,36 +6997,9 @@ static Expected createOutlinedFunction( // Save insert point. IRBuilder<>::InsertPointGuard IPG(Builder); - // If there's a DISubprogram associated with current function, then - // generate one for the outlined function. - if (Function *ParentFunc = BB->getParent()) { - if (DISubprogram *SP = ParentFunc->getSubprogram()) { - DICompileUnit *CU = SP->getUnit(); - DIBuilder DB(*M, true, CU); - DebugLoc DL = Builder.getCurrentDebugLocation(); - if (DL) { - // TODO: We are using nullopt for arguments at the moment. This will - // need to be updated when debug data is being generated for variables. - DISubroutineType *Ty = - DB.createSubroutineType(DB.getOrCreateTypeArray({})); - DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | - DISubprogram::SPFlagOptimized | - DISubprogram::SPFlagLocalToUnit; - - DISubprogram *OutlinedSP = DB.createFunction( - CU, FuncName, FuncName, SP->getFile(), DL.getLine(), Ty, - DL.getLine(), DINode::DIFlags::FlagArtificial, SPFlags); - - // Attach subprogram to the function. - Func->setSubprogram(OutlinedSP); - // Update the CurrentDebugLocation in the builder so that right scope - // is used for things inside outlined function. - Builder.SetCurrentDebugLocation( - DILocation::get(Func->getContext(), DL.getLine(), DL.getCol(), - OutlinedSP, DL.getInlinedAt())); - } - } - } + // We will generate the entries in the outlined function but the debug + // location may still be pointing to the parent function. Reset it now. + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Generate the region into the function. BasicBlock *EntryBB = BasicBlock::Create(Builder.getContext(), "entry", Func); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3806db3ceab25..c1e1fec3ddef8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -5324,9 +5324,27 @@ static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto targetOp = cast(opInst); + // The current debug location already has the DISubprogram for the outlined + // function that will be created for the target op. We save it here so that + // we can set it on the outlined function. + llvm::DebugLoc outlinedFnLoc = builder.getCurrentDebugLocation(); if (failed(checkImplementationStatus(opInst))) return failure(); + // During the handling of target op, we will generate instructions in the + // parent function like call to the oulined function or branch to a new + // BasicBlock. We set the debug location here to parent function so that those + // get the correct debug locations. For outlined functions, the normal MLIR op + // conversion will automatically pick the correct location. + llvm::BasicBlock *parentBB = builder.GetInsertBlock(); + assert(parentBB && "No insert block is set for the builder"); + llvm::Function *parentLLVMFn = parentBB->getParent(); + assert(parentLLVMFn && "Parent Function must be valid"); + if (llvm::DISubprogram *SP = parentLLVMFn->getSubprogram()) + builder.SetCurrentDebugLocation(llvm::DILocation::get( + parentLLVMFn->getContext(), outlinedFnLoc.getLine(), + outlinedFnLoc.getCol(), SP, outlinedFnLoc.getInlinedAt())); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); bool isTargetDevice = ompBuilder->Config.isTargetDevice(); bool isGPU = ompBuilder->Config.isGPU(); @@ -5420,6 +5438,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, assert(llvmParentFn && llvmOutlinedFn && "Both parent and outlined functions must exist at this point"); + if (outlinedFnLoc && llvmParentFn->getSubprogram()) + llvmOutlinedFn->setSubprogram(outlinedFnLoc->getScope()->getSubprogram()); + if (auto attr = llvmParentFn->getFnAttribute("target-cpu"); attr.isStringAttribute()) llvmOutlinedFn->addFnAttr(attr); diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir new file mode 100644 index 0000000000000..45e5d2612e2c2 ---... [truncated] ``````````
https://github.com/llvm/llvm-project/pull/146532 From flang-commits at lists.llvm.org Tue Jul 1 06:58:52 2025 From: flang-commits at lists.llvm.org (via flang-commits) Date: Tue, 01 Jul 2025 06:58:52 -0700 (PDT) Subject: [flang-commits] [flang] [llvm] [mlir] [flang][debug] Generate DISubprogramAttr for omp::TargetOp. (PR #146532) In-Reply-To: Message-ID: <6863e99c.050a0220.1eae93.49cf@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-mlir-llvm Author: Abid Qadeer (abidh)
Changes This is combination of https://github.com/llvm/llvm-project/pull/138149 and https://github.com/llvm/llvm-project/pull/138039 which were opened separately for ease of reviewing. Only other change is adjustments in 2 tests which have gone in since. There are `DeclareOp` present for the variables mapped into target region. That allow us to generate debug information for them. But the `TargetOp` is still part of parent function and those variables get the parent function's `DISubprogram` as a scope. In `OMPIRBuilder`, a new function is created for the `TargetOp`. We also create a new `DISubprogram` for it. All the variables that were in the target region now have to be updated to have the correct scope. This after the fact updating of debug information becomes very difficult in certain cases. Take the example of variable arrays. The type of those arrays depend on the artificial `DILocalVariable`(s) which hold the size(s) of the array. This new function will now require that we generate the new variable and and new types. Similar issue exist for character type variables too. To avoid this after the fact updating, this PR generates a `DISubprogramAttr` for the `TargetOp` while generating the debug info in `flang`. Then we don't need to generate a `DISubprogram` in `OMPIRBuilder`. This change is made a bit more complicated by the the fact that in new scheme, the debug location already points to the new `DISubprogram` by the time it reaches `convertOmpTarget`. But we need some code generation in the parent function so we have to carefully manage the debug locations. This fixes issue `#134991`. --- Patch is 35.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146532.diff 14 Files Affected: - (modified) flang/lib/Optimizer/Transforms/AddDebugInfo.cpp (+116-1) - (added) flang/test/Transforms/debug-omp-target-op-1.fir (+40) - (added) flang/test/Transforms/debug-omp-target-op-2.fir (+53) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+8-38) - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+21) - (added) mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir (+27) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir (+3-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-map-link-loc.mlir (+5-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-nowait.mlir (+4-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir (+8-5) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-var-2.mlir (+8-5) - (modified) mlir/test/Target/LLVMIR/omptarget-debug.mlir (+6-2) - (modified) mlir/test/Target/LLVMIR/omptarget-debug2.mlir (+6-2) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir (+6-2) ``````````diff diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index 8fa2f38818c02..6eb914e67fd54 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -35,6 +35,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -104,6 +105,37 @@ bool debugInfoIsAlreadySet(mlir::Location loc) { return false; } +// Generates the name for the artificial DISubprogram that we are going to +// generate for omp::TargetOp. Its logic is borrowed from +// getTargetEntryUniqueInfo and +// TargetRegionEntryInfo::getTargetRegionEntryFnName to generate the same name. +// But even if there was a slight mismatch, it is not a problem because this +// name is artificial and not important to debug experience. +mlir::StringAttr getTargetFunctionName(mlir::MLIRContext *context, + mlir::Location Loc, + llvm::StringRef parentName) { + auto fileLoc = Loc->findInstanceOf(); + + assert(fileLoc && "No file found from location"); + llvm::StringRef fileName = fileLoc.getFilename().getValue(); + + llvm::sys::fs::UniqueID id; + uint64_t line = fileLoc.getLine(); + size_t fileId; + size_t deviceId; + if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) { + fileId = llvm::hash_value(fileName.str()); + deviceId = 0xdeadf17e; + } else { + fileId = id.getFile(); + deviceId = id.getDevice(); + } + return mlir::StringAttr::get( + context, + std::string(llvm::formatv("__omp_offloading_{0:x-}_{1:x-}_{2}_l{3}", + deviceId, fileId, parentName, line))); +} + } // namespace bool AddDebugInfoPass::createCommonBlockGlobal( @@ -446,6 +478,79 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, line - 1, false); } + auto addTargetOpDISP = [&](bool lineTableOnly, + llvm::ArrayRef entities) { + // When we process the DeclareOp inside the OpenMP target region, all the + // variables get the DISubprogram of the parent function of the target op as + // the scope. In the codegen (to llvm ir), OpenMP target op results in the + // creation of a separate function. As the variables in the debug info have + // the DISubprogram of the parent function as the scope, the variables + // need to be updated at codegen time to avoid verification failures. + + // This updating after the fact becomes more and more difficult when types + // are dependent on local variables like in the case of variable size arrays + // or string. We not only have to generate new variables but also new types. + // We can avoid this problem by generating a DISubprogramAttr here for the + // target op and make sure that all the variables inside the target region + // get the correct scope in the first place. + funcOp.walk([&](mlir::omp::TargetOp targetOp) { + unsigned line = getLineFromLoc(targetOp.getLoc()); + mlir::StringAttr name = + getTargetFunctionName(context, targetOp.getLoc(), funcOp.getName()); + mlir::LLVM::DISubprogramFlags flags = + mlir::LLVM::DISubprogramFlags::Definition | + mlir::LLVM::DISubprogramFlags::LocalToUnit; + if (isOptimized) + flags = flags | mlir::LLVM::DISubprogramFlags::Optimized; + + mlir::DistinctAttr id = + mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + llvm::SmallVector types; + types.push_back(mlir::LLVM::DINullTypeAttr::get(context)); + for (auto arg : targetOp.getRegion().getArguments()) { + auto tyAttr = typeGen.convertType(fir::unwrapRefType(arg.getType()), + fileAttr, cuAttr, /*declOp=*/nullptr); + types.push_back(tyAttr); + } + CC = llvm::dwarf::getCallingConvention("DW_CC_normal"); + mlir::LLVM::DISubroutineTypeAttr spTy = + mlir::LLVM::DISubroutineTypeAttr::get(context, CC, types); + if (lineTableOnly) { + auto spAttr = mlir::LLVM::DISubprogramAttr::get( + context, id, compilationUnit, Scope, name, name, funcFileAttr, line, + line, flags, spTy, /*retainedNodes=*/{}, /*annotations=*/{}); + targetOp->setLoc(builder.getFusedLoc({targetOp.getLoc()}, spAttr)); + return; + } + mlir::DistinctAttr recId = + mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + auto spAttr = mlir::LLVM::DISubprogramAttr::get( + context, recId, /*isRecSelf=*/true, id, compilationUnit, Scope, name, + name, funcFileAttr, line, line, flags, spTy, /*retainedNodes=*/{}, + /*annotations=*/{}); + + // Make sure that information about the imported modules is copied in the + // new function. + llvm::SmallVector opEntities; + for (mlir::LLVM::DINodeAttr N : entities) { + if (auto entity = mlir::dyn_cast(N)) { + auto importedEntity = mlir::LLVM::DIImportedEntityAttr::get( + context, llvm::dwarf::DW_TAG_imported_module, spAttr, + entity.getEntity(), fileAttr, /*line=*/1, /*name=*/nullptr, + /*elements*/ {}); + opEntities.push_back(importedEntity); + } + } + + id = mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + spAttr = mlir::LLVM::DISubprogramAttr::get( + context, recId, /*isRecSelf=*/false, id, compilationUnit, Scope, name, + name, funcFileAttr, line, line, flags, spTy, opEntities, + /*annotations=*/{}); + targetOp->setLoc(builder.getFusedLoc({targetOp.getLoc()}, spAttr)); + }); + }; + // Don't process variables if user asked for line tables only. if (debugLevel == mlir::LLVM::DIEmissionKind::LineTablesOnly) { auto spAttr = mlir::LLVM::DISubprogramAttr::get( @@ -453,6 +558,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, line, line, subprogramFlags, subTypeAttr, /*retainedNodes=*/{}, /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); + addTargetOpDISP(/*lineTableOnly=*/true, /*entities=*/{}); return; } @@ -510,9 +616,18 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, funcName, fullName, funcFileAttr, line, line, subprogramFlags, subTypeAttr, entities, /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); + addTargetOpDISP(/*lineTableOnly=*/false, entities); funcOp.walk([&](fir::cg::XDeclareOp declOp) { - handleDeclareOp(declOp, fileAttr, spAttr, typeGen, symbolTable); + mlir::LLVM::DISubprogramAttr spTy = spAttr; + if (auto tOp = declOp->getParentOfType()) { + if (auto fusedLoc = llvm::dyn_cast(tOp.getLoc())) { + if (auto sp = llvm::dyn_cast( + fusedLoc.getMetadata())) + spTy = sp; + } + } + handleDeclareOp(declOp, fileAttr, spTy, typeGen, symbolTable); }); // commonBlockMap ensures that we don't create multiple DICommonBlockAttr of // the same name in one function. But it is ok (rather required) to create diff --git a/flang/test/Transforms/debug-omp-target-op-1.fir b/flang/test/Transforms/debug-omp-target-op-1.fir new file mode 100644 index 0000000000000..6b895b732c42b --- /dev/null +++ b/flang/test/Transforms/debug-omp-target-op-1.fir @@ -0,0 +1,40 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s +// RUN: fir-opt --add-debug-info="debug-level=LineTablesOnly" --mlir-print-debuginfo %s | FileCheck %s --check-prefix=LINETABLE + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @_QQmain() attributes {fir.bindc_name = "test"} { + %c13_i32 = arith.constant 13 : i32 + %c12_i32 = arith.constant 12 : i32 + %c6_i32 = arith.constant 6 : i32 + %c1_i32 = arith.constant 1 : i32 + %c5_i32 = arith.constant 5 : i32 + %0 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} loc(#loc1) + %1 = fircg.ext_declare %0 {uniq_name = "_QFEx"} : (!fir.ref) -> !fir.ref loc(#loc1) + %2 = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} loc(#loc2) + %3 = fircg.ext_declare %2 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc2) + %4 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "x"} + %5 = omp.map.info var_ptr(%3 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "y"} + omp.target map_entries(%4 -> %arg0, %5 -> %arg1 : !fir.ref, !fir.ref) { + %16 = fircg.ext_declare %arg0 {uniq_name = "_QFEx"} : (!fir.ref) -> !fir.ref loc(#loc3) + %17 = fircg.ext_declare %arg1 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc4) + omp.terminator + } loc(#loc5) + return + } +} +#loc1 = loc("test.f90":1:1) +#loc2 = loc("test.f90":3:1) +#loc3 = loc("test.f90":7:1) +#loc4 = loc("test.f90":8:1) +#loc5 = loc("test.f90":6:1) + +// CHECK: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "test"{{.*}}> +// CHECK: #[[SP1:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_QQmain_l6"{{.*}}line = 6{{.*}}subprogramFlags = "LocalToUnit|Definition"{{.*}}> +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable + +// LINETABLE: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "test"{{.*}}> +// LINETABLE: #[[SP1:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_QQmain_l6"{{.*}}line = 6{{.*}}subprogramFlags = "LocalToUnit|Definition"{{.*}}> +// LINETABLE-NOT: #llvm.di_local_variable diff --git a/flang/test/Transforms/debug-omp-target-op-2.fir b/flang/test/Transforms/debug-omp-target-op-2.fir new file mode 100644 index 0000000000000..15dcf2389b21d --- /dev/null +++ b/flang/test/Transforms/debug-omp-target-op-2.fir @@ -0,0 +1,53 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @fn_(%arg0: !fir.ref> {fir.bindc_name = "b"}, %arg1: !fir.ref {fir.bindc_name = "c"}, %arg2: !fir.ref {fir.bindc_name = "d"}) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.alloca i32 + %1 = fir.alloca i32 + %2 = fir.undefined !fir.dscope + %3 = fircg.ext_declare %arg1 dummy_scope %2 {uniq_name = "_QFfnEc"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc2) + %4 = fircg.ext_declare %arg2 dummy_scope %2 {uniq_name = "_QFfnEd"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc3) + %5 = fir.load %3 : !fir.ref + %6 = fir.convert %5 : (i32) -> index + %9 = fir.load %4 : !fir.ref + %10 = fir.convert %9 : (i32) -> index + %15 = fircg.ext_declare %arg0(%6, %10) dummy_scope %2 {uniq_name = "_QFfnEb"} : (!fir.ref>, index, index, !fir.dscope) -> !fir.ref> loc(#loc4) + %16 = fircg.ext_embox %15(%6, %10) : (!fir.ref>, index, index) -> !fir.box> + %17:3 = fir.box_dims %16, %c0 : (!fir.box>, index) -> (index, index, index) + %18 = arith.subi %17#1, %c1 : index + %19 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%18 : index) extent(%17#1 : index) stride(%17#2 : index) start_idx(%c1 : index) {stride_in_bytes = true} + %20 = arith.muli %17#2, %17#1 : index + %21:3 = fir.box_dims %16, %c1 : (!fir.box>, index) -> (index, index, index) + %22 = arith.subi %21#1, %c1 : index + %23 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%22 : index) extent(%21#1 : index) stride(%20 : index) start_idx(%c1 : index) {stride_in_bytes = true} + %24 = omp.map.info var_ptr(%15 : !fir.ref>, i32) map_clauses(tofrom) capture(ByRef) bounds(%19, %23) -> !fir.ref> {name = "b"} + %25 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} + %26 = omp.map.info var_ptr(%0 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} + omp.target map_entries(%24 -> %arg3, %25 -> %arg4, %26 -> %arg5 : !fir.ref>, !fir.ref, !fir.ref) { + %27 = fir.load %arg5 : !fir.ref + %28 = fir.load %arg4 : !fir.ref + %29 = fir.convert %27 : (i32) -> index + %31 = fir.convert %28 : (i32) -> index + %37 = fircg.ext_declare %arg3(%29, %31) {uniq_name = "_QFfnEb"} : (!fir.ref>, index, index) -> !fir.ref> loc(#loc5) + omp.terminator + } loc(#loc6) + return + } loc(#loc7) +} +#loc1 = loc("test.f90":1:1) +#loc2 = loc("test.f90":3:1) +#loc3 = loc("test.f90":7:1) +#loc4 = loc("test.f90":8:1) +#loc5 = loc("test.f90":6:1) +#loc6 = loc("test.f90":16:1) +#loc7 = loc("test.f90":26:1) + + +// Test that variable size arrays inside target regions get their own +// compiler generated variables for size. + +// CHECK: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_fn__l16"{{.*}}> +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 85451b1233f96..db792a3b52d24 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6891,23 +6891,19 @@ static void FixupDebugInfoForOutlinedFunction( if (!NewSP) return; - DenseMap Cache; SmallDenseMap RemappedVariables; auto GetUpdatedDIVariable = [&](DILocalVariable *OldVar, unsigned arg) { - auto NewSP = Func->getSubprogram(); DILocalVariable *&NewVar = RemappedVariables[OldVar]; // Only use cached variable if the arg number matches. This is important // so that DIVariable created for privatized variables are not discarded. if (NewVar && (arg == NewVar->getArg())) return NewVar; - DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( - *OldVar->getScope(), *NewSP, Builder.getContext(), Cache); NewVar = llvm::DILocalVariable::get( - Builder.getContext(), NewScope, OldVar->getName(), OldVar->getFile(), - OldVar->getLine(), OldVar->getType(), arg, OldVar->getFlags(), - OldVar->getAlignInBits(), OldVar->getAnnotations()); + Builder.getContext(), OldVar->getScope(), OldVar->getName(), + OldVar->getFile(), OldVar->getLine(), OldVar->getType(), arg, + OldVar->getFlags(), OldVar->getAlignInBits(), OldVar->getAnnotations()); return NewVar; }; @@ -6921,7 +6917,8 @@ static void FixupDebugInfoForOutlinedFunction( ArgNo = std::get<1>(Iter->second) + 1; } } - DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo)); + if (ArgNo != 0) + DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo)); }; // The location and scope of variable intrinsics and records still point to @@ -7000,36 +6997,9 @@ static Expected createOutlinedFunction( // Save insert point. IRBuilder<>::InsertPointGuard IPG(Builder); - // If there's a DISubprogram associated with current function, then - // generate one for the outlined function. - if (Function *ParentFunc = BB->getParent()) { - if (DISubprogram *SP = ParentFunc->getSubprogram()) { - DICompileUnit *CU = SP->getUnit(); - DIBuilder DB(*M, true, CU); - DebugLoc DL = Builder.getCurrentDebugLocation(); - if (DL) { - // TODO: We are using nullopt for arguments at the moment. This will - // need to be updated when debug data is being generated for variables. - DISubroutineType *Ty = - DB.createSubroutineType(DB.getOrCreateTypeArray({})); - DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | - DISubprogram::SPFlagOptimized | - DISubprogram::SPFlagLocalToUnit; - - DISubprogram *OutlinedSP = DB.createFunction( - CU, FuncName, FuncName, SP->getFile(), DL.getLine(), Ty, - DL.getLine(), DINode::DIFlags::FlagArtificial, SPFlags); - - // Attach subprogram to the function. - Func->setSubprogram(OutlinedSP); - // Update the CurrentDebugLocation in the builder so that right scope - // is used for things inside outlined function. - Builder.SetCurrentDebugLocation( - DILocation::get(Func->getContext(), DL.getLine(), DL.getCol(), - OutlinedSP, DL.getInlinedAt())); - } - } - } + // We will generate the entries in the outlined function but the debug + // location may still be pointing to the parent function. Reset it now. + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Generate the region into the function. BasicBlock *EntryBB = BasicBlock::Create(Builder.getContext(), "entry", Func); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3806db3ceab25..c1e1fec3ddef8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -5324,9 +5324,27 @@ static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto targetOp = cast(opInst); + // The current debug location already has the DISubprogram for the outlined + // function that will be created for the target op. We save it here so that + // we can set it on the outlined function. + llvm::DebugLoc outlinedFnLoc = builder.getCurrentDebugLocation(); if (failed(checkImplementationStatus(opInst))) return failure(); + // During the handling of target op, we will generate instructions in the + // parent function like call to the oulined function or branch to a new + // BasicBlock. We set the debug location here to parent function so that those + // get the correct debug locations. For outlined functions, the normal MLIR op + // conversion will automatically pick the correct location. + llvm::BasicBlock *parentBB = builder.GetInsertBlock(); + assert(parentBB && "No insert block is set for the builder"); + llvm::Function *parentLLVMFn = parentBB->getParent(); + assert(parentLLVMFn && "Parent Function must be valid"); + if (llvm::DISubprogram *SP = parentLLVMFn->getSubprogram()) + builder.SetCurrentDebugLocation(llvm::DILocation::get( + parentLLVMFn->getContext(), outlinedFnLoc.getLine(), + outlinedFnLoc.getCol(), SP, outlinedFnLoc.getInlinedAt())); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); bool isTargetDevice = ompBuilder->Config.isTargetDevice(); bool isGPU = ompBuilder->Config.isGPU(); @@ -5420,6 +5438,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, assert(llvmParentFn && llvmOutlinedFn && "Both parent and outlined functions must exist at this point"); + if (outlinedFnLoc && llvmParentFn->getSubprogram()) + llvmOutlinedFn->setSubprogram(outlinedFnLoc->getScope()->getSubprogram()); + if (auto attr = llvmParentFn->getFnAttribute("target-cpu"); attr.isStringAttribute()) llvmOutlinedFn->addFnAttr(attr); diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir new file mode 100644 index 0000000000000..45e5d2612e2c2 ---... [truncated] ``````````
https://github.com/llvm/llvm-project/pull/146532 From flang-commits at lists.llvm.org Tue Jul 1 06:58:52 2025 From: flang-commits at lists.llvm.org (via flang-commits) Date: Tue, 01 Jul 2025 06:58:52 -0700 (PDT) Subject: [flang-commits] [flang] [llvm] [mlir] [flang][debug] Generate DISubprogramAttr for omp::TargetOp. (PR #146532) In-Reply-To: Message-ID: <6863e99c.170a0220.1a32be.06de@mx.google.com> llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Abid Qadeer (abidh)
Changes This is combination of https://github.com/llvm/llvm-project/pull/138149 and https://github.com/llvm/llvm-project/pull/138039 which were opened separately for ease of reviewing. Only other change is adjustments in 2 tests which have gone in since. There are `DeclareOp` present for the variables mapped into target region. That allow us to generate debug information for them. But the `TargetOp` is still part of parent function and those variables get the parent function's `DISubprogram` as a scope. In `OMPIRBuilder`, a new function is created for the `TargetOp`. We also create a new `DISubprogram` for it. All the variables that were in the target region now have to be updated to have the correct scope. This after the fact updating of debug information becomes very difficult in certain cases. Take the example of variable arrays. The type of those arrays depend on the artificial `DILocalVariable`(s) which hold the size(s) of the array. This new function will now require that we generate the new variable and and new types. Similar issue exist for character type variables too. To avoid this after the fact updating, this PR generates a `DISubprogramAttr` for the `TargetOp` while generating the debug info in `flang`. Then we don't need to generate a `DISubprogram` in `OMPIRBuilder`. This change is made a bit more complicated by the the fact that in new scheme, the debug location already points to the new `DISubprogram` by the time it reaches `convertOmpTarget`. But we need some code generation in the parent function so we have to carefully manage the debug locations. This fixes issue `#134991`. --- Patch is 35.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146532.diff 14 Files Affected: - (modified) flang/lib/Optimizer/Transforms/AddDebugInfo.cpp (+116-1) - (added) flang/test/Transforms/debug-omp-target-op-1.fir (+40) - (added) flang/test/Transforms/debug-omp-target-op-2.fir (+53) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+8-38) - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+21) - (added) mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir (+27) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir (+3-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-map-link-loc.mlir (+5-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-nowait.mlir (+4-1) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir (+8-5) - (modified) mlir/test/Target/LLVMIR/omptarget-debug-var-2.mlir (+8-5) - (modified) mlir/test/Target/LLVMIR/omptarget-debug.mlir (+6-2) - (modified) mlir/test/Target/LLVMIR/omptarget-debug2.mlir (+6-2) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir (+6-2) ``````````diff diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index 8fa2f38818c02..6eb914e67fd54 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -35,6 +35,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -104,6 +105,37 @@ bool debugInfoIsAlreadySet(mlir::Location loc) { return false; } +// Generates the name for the artificial DISubprogram that we are going to +// generate for omp::TargetOp. Its logic is borrowed from +// getTargetEntryUniqueInfo and +// TargetRegionEntryInfo::getTargetRegionEntryFnName to generate the same name. +// But even if there was a slight mismatch, it is not a problem because this +// name is artificial and not important to debug experience. +mlir::StringAttr getTargetFunctionName(mlir::MLIRContext *context, + mlir::Location Loc, + llvm::StringRef parentName) { + auto fileLoc = Loc->findInstanceOf(); + + assert(fileLoc && "No file found from location"); + llvm::StringRef fileName = fileLoc.getFilename().getValue(); + + llvm::sys::fs::UniqueID id; + uint64_t line = fileLoc.getLine(); + size_t fileId; + size_t deviceId; + if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) { + fileId = llvm::hash_value(fileName.str()); + deviceId = 0xdeadf17e; + } else { + fileId = id.getFile(); + deviceId = id.getDevice(); + } + return mlir::StringAttr::get( + context, + std::string(llvm::formatv("__omp_offloading_{0:x-}_{1:x-}_{2}_l{3}", + deviceId, fileId, parentName, line))); +} + } // namespace bool AddDebugInfoPass::createCommonBlockGlobal( @@ -446,6 +478,79 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, line - 1, false); } + auto addTargetOpDISP = [&](bool lineTableOnly, + llvm::ArrayRef entities) { + // When we process the DeclareOp inside the OpenMP target region, all the + // variables get the DISubprogram of the parent function of the target op as + // the scope. In the codegen (to llvm ir), OpenMP target op results in the + // creation of a separate function. As the variables in the debug info have + // the DISubprogram of the parent function as the scope, the variables + // need to be updated at codegen time to avoid verification failures. + + // This updating after the fact becomes more and more difficult when types + // are dependent on local variables like in the case of variable size arrays + // or string. We not only have to generate new variables but also new types. + // We can avoid this problem by generating a DISubprogramAttr here for the + // target op and make sure that all the variables inside the target region + // get the correct scope in the first place. + funcOp.walk([&](mlir::omp::TargetOp targetOp) { + unsigned line = getLineFromLoc(targetOp.getLoc()); + mlir::StringAttr name = + getTargetFunctionName(context, targetOp.getLoc(), funcOp.getName()); + mlir::LLVM::DISubprogramFlags flags = + mlir::LLVM::DISubprogramFlags::Definition | + mlir::LLVM::DISubprogramFlags::LocalToUnit; + if (isOptimized) + flags = flags | mlir::LLVM::DISubprogramFlags::Optimized; + + mlir::DistinctAttr id = + mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + llvm::SmallVector types; + types.push_back(mlir::LLVM::DINullTypeAttr::get(context)); + for (auto arg : targetOp.getRegion().getArguments()) { + auto tyAttr = typeGen.convertType(fir::unwrapRefType(arg.getType()), + fileAttr, cuAttr, /*declOp=*/nullptr); + types.push_back(tyAttr); + } + CC = llvm::dwarf::getCallingConvention("DW_CC_normal"); + mlir::LLVM::DISubroutineTypeAttr spTy = + mlir::LLVM::DISubroutineTypeAttr::get(context, CC, types); + if (lineTableOnly) { + auto spAttr = mlir::LLVM::DISubprogramAttr::get( + context, id, compilationUnit, Scope, name, name, funcFileAttr, line, + line, flags, spTy, /*retainedNodes=*/{}, /*annotations=*/{}); + targetOp->setLoc(builder.getFusedLoc({targetOp.getLoc()}, spAttr)); + return; + } + mlir::DistinctAttr recId = + mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + auto spAttr = mlir::LLVM::DISubprogramAttr::get( + context, recId, /*isRecSelf=*/true, id, compilationUnit, Scope, name, + name, funcFileAttr, line, line, flags, spTy, /*retainedNodes=*/{}, + /*annotations=*/{}); + + // Make sure that information about the imported modules is copied in the + // new function. + llvm::SmallVector opEntities; + for (mlir::LLVM::DINodeAttr N : entities) { + if (auto entity = mlir::dyn_cast(N)) { + auto importedEntity = mlir::LLVM::DIImportedEntityAttr::get( + context, llvm::dwarf::DW_TAG_imported_module, spAttr, + entity.getEntity(), fileAttr, /*line=*/1, /*name=*/nullptr, + /*elements*/ {}); + opEntities.push_back(importedEntity); + } + } + + id = mlir::DistinctAttr::create(mlir::UnitAttr::get(context)); + spAttr = mlir::LLVM::DISubprogramAttr::get( + context, recId, /*isRecSelf=*/false, id, compilationUnit, Scope, name, + name, funcFileAttr, line, line, flags, spTy, opEntities, + /*annotations=*/{}); + targetOp->setLoc(builder.getFusedLoc({targetOp.getLoc()}, spAttr)); + }); + }; + // Don't process variables if user asked for line tables only. if (debugLevel == mlir::LLVM::DIEmissionKind::LineTablesOnly) { auto spAttr = mlir::LLVM::DISubprogramAttr::get( @@ -453,6 +558,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, line, line, subprogramFlags, subTypeAttr, /*retainedNodes=*/{}, /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); + addTargetOpDISP(/*lineTableOnly=*/true, /*entities=*/{}); return; } @@ -510,9 +616,18 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, funcName, fullName, funcFileAttr, line, line, subprogramFlags, subTypeAttr, entities, /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); + addTargetOpDISP(/*lineTableOnly=*/false, entities); funcOp.walk([&](fir::cg::XDeclareOp declOp) { - handleDeclareOp(declOp, fileAttr, spAttr, typeGen, symbolTable); + mlir::LLVM::DISubprogramAttr spTy = spAttr; + if (auto tOp = declOp->getParentOfType()) { + if (auto fusedLoc = llvm::dyn_cast(tOp.getLoc())) { + if (auto sp = llvm::dyn_cast( + fusedLoc.getMetadata())) + spTy = sp; + } + } + handleDeclareOp(declOp, fileAttr, spTy, typeGen, symbolTable); }); // commonBlockMap ensures that we don't create multiple DICommonBlockAttr of // the same name in one function. But it is ok (rather required) to create diff --git a/flang/test/Transforms/debug-omp-target-op-1.fir b/flang/test/Transforms/debug-omp-target-op-1.fir new file mode 100644 index 0000000000000..6b895b732c42b --- /dev/null +++ b/flang/test/Transforms/debug-omp-target-op-1.fir @@ -0,0 +1,40 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s +// RUN: fir-opt --add-debug-info="debug-level=LineTablesOnly" --mlir-print-debuginfo %s | FileCheck %s --check-prefix=LINETABLE + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @_QQmain() attributes {fir.bindc_name = "test"} { + %c13_i32 = arith.constant 13 : i32 + %c12_i32 = arith.constant 12 : i32 + %c6_i32 = arith.constant 6 : i32 + %c1_i32 = arith.constant 1 : i32 + %c5_i32 = arith.constant 5 : i32 + %0 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} loc(#loc1) + %1 = fircg.ext_declare %0 {uniq_name = "_QFEx"} : (!fir.ref) -> !fir.ref loc(#loc1) + %2 = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} loc(#loc2) + %3 = fircg.ext_declare %2 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc2) + %4 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "x"} + %5 = omp.map.info var_ptr(%3 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "y"} + omp.target map_entries(%4 -> %arg0, %5 -> %arg1 : !fir.ref, !fir.ref) { + %16 = fircg.ext_declare %arg0 {uniq_name = "_QFEx"} : (!fir.ref) -> !fir.ref loc(#loc3) + %17 = fircg.ext_declare %arg1 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc4) + omp.terminator + } loc(#loc5) + return + } +} +#loc1 = loc("test.f90":1:1) +#loc2 = loc("test.f90":3:1) +#loc3 = loc("test.f90":7:1) +#loc4 = loc("test.f90":8:1) +#loc5 = loc("test.f90":6:1) + +// CHECK: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "test"{{.*}}> +// CHECK: #[[SP1:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_QQmain_l6"{{.*}}line = 6{{.*}}subprogramFlags = "LocalToUnit|Definition"{{.*}}> +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable + +// LINETABLE: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "test"{{.*}}> +// LINETABLE: #[[SP1:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_QQmain_l6"{{.*}}line = 6{{.*}}subprogramFlags = "LocalToUnit|Definition"{{.*}}> +// LINETABLE-NOT: #llvm.di_local_variable diff --git a/flang/test/Transforms/debug-omp-target-op-2.fir b/flang/test/Transforms/debug-omp-target-op-2.fir new file mode 100644 index 0000000000000..15dcf2389b21d --- /dev/null +++ b/flang/test/Transforms/debug-omp-target-op-2.fir @@ -0,0 +1,53 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @fn_(%arg0: !fir.ref> {fir.bindc_name = "b"}, %arg1: !fir.ref {fir.bindc_name = "c"}, %arg2: !fir.ref {fir.bindc_name = "d"}) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.alloca i32 + %1 = fir.alloca i32 + %2 = fir.undefined !fir.dscope + %3 = fircg.ext_declare %arg1 dummy_scope %2 {uniq_name = "_QFfnEc"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc2) + %4 = fircg.ext_declare %arg2 dummy_scope %2 {uniq_name = "_QFfnEd"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc3) + %5 = fir.load %3 : !fir.ref + %6 = fir.convert %5 : (i32) -> index + %9 = fir.load %4 : !fir.ref + %10 = fir.convert %9 : (i32) -> index + %15 = fircg.ext_declare %arg0(%6, %10) dummy_scope %2 {uniq_name = "_QFfnEb"} : (!fir.ref>, index, index, !fir.dscope) -> !fir.ref> loc(#loc4) + %16 = fircg.ext_embox %15(%6, %10) : (!fir.ref>, index, index) -> !fir.box> + %17:3 = fir.box_dims %16, %c0 : (!fir.box>, index) -> (index, index, index) + %18 = arith.subi %17#1, %c1 : index + %19 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%18 : index) extent(%17#1 : index) stride(%17#2 : index) start_idx(%c1 : index) {stride_in_bytes = true} + %20 = arith.muli %17#2, %17#1 : index + %21:3 = fir.box_dims %16, %c1 : (!fir.box>, index) -> (index, index, index) + %22 = arith.subi %21#1, %c1 : index + %23 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%22 : index) extent(%21#1 : index) stride(%20 : index) start_idx(%c1 : index) {stride_in_bytes = true} + %24 = omp.map.info var_ptr(%15 : !fir.ref>, i32) map_clauses(tofrom) capture(ByRef) bounds(%19, %23) -> !fir.ref> {name = "b"} + %25 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} + %26 = omp.map.info var_ptr(%0 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} + omp.target map_entries(%24 -> %arg3, %25 -> %arg4, %26 -> %arg5 : !fir.ref>, !fir.ref, !fir.ref) { + %27 = fir.load %arg5 : !fir.ref + %28 = fir.load %arg4 : !fir.ref + %29 = fir.convert %27 : (i32) -> index + %31 = fir.convert %28 : (i32) -> index + %37 = fircg.ext_declare %arg3(%29, %31) {uniq_name = "_QFfnEb"} : (!fir.ref>, index, index) -> !fir.ref> loc(#loc5) + omp.terminator + } loc(#loc6) + return + } loc(#loc7) +} +#loc1 = loc("test.f90":1:1) +#loc2 = loc("test.f90":3:1) +#loc3 = loc("test.f90":7:1) +#loc4 = loc("test.f90":8:1) +#loc5 = loc("test.f90":6:1) +#loc6 = loc("test.f90":16:1) +#loc7 = loc("test.f90":26:1) + + +// Test that variable size arrays inside target regions get their own +// compiler generated variables for size. + +// CHECK: #[[SP:.*]] = #llvm.di_subprogram<{{.*}}name = "__omp_offloading_{{.*}}_fn__l16"{{.*}}> +// CHECK: #llvm.di_local_variable +// CHECK: #llvm.di_local_variable diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 85451b1233f96..db792a3b52d24 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6891,23 +6891,19 @@ static void FixupDebugInfoForOutlinedFunction( if (!NewSP) return; - DenseMap Cache; SmallDenseMap RemappedVariables; auto GetUpdatedDIVariable = [&](DILocalVariable *OldVar, unsigned arg) { - auto NewSP = Func->getSubprogram(); DILocalVariable *&NewVar = RemappedVariables[OldVar]; // Only use cached variable if the arg number matches. This is important // so that DIVariable created for privatized variables are not discarded. if (NewVar && (arg == NewVar->getArg())) return NewVar; - DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( - *OldVar->getScope(), *NewSP, Builder.getContext(), Cache); NewVar = llvm::DILocalVariable::get( - Builder.getContext(), NewScope, OldVar->getName(), OldVar->getFile(), - OldVar->getLine(), OldVar->getType(), arg, OldVar->getFlags(), - OldVar->getAlignInBits(), OldVar->getAnnotations()); + Builder.getContext(), OldVar->getScope(), OldVar->getName(), + OldVar->getFile(), OldVar->getLine(), OldVar->getType(), arg, + OldVar->getFlags(), OldVar->getAlignInBits(), OldVar->getAnnotations()); return NewVar; }; @@ -6921,7 +6917,8 @@ static void FixupDebugInfoForOutlinedFunction( ArgNo = std::get<1>(Iter->second) + 1; } } - DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo)); + if (ArgNo != 0) + DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo)); }; // The location and scope of variable intrinsics and records still point to @@ -7000,36 +6997,9 @@ static Expected createOutlinedFunction( // Save insert point. IRBuilder<>::InsertPointGuard IPG(Builder); - // If there's a DISubprogram associated with current function, then - // generate one for the outlined function. - if (Function *ParentFunc = BB->getParent()) { - if (DISubprogram *SP = ParentFunc->getSubprogram()) { - DICompileUnit *CU = SP->getUnit(); - DIBuilder DB(*M, true, CU); - DebugLoc DL = Builder.getCurrentDebugLocation(); - if (DL) { - // TODO: We are using nullopt for arguments at the moment. This will - // need to be updated when debug data is being generated for variables. - DISubroutineType *Ty = - DB.createSubroutineType(DB.getOrCreateTypeArray({})); - DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | - DISubprogram::SPFlagOptimized | - DISubprogram::SPFlagLocalToUnit; - - DISubprogram *OutlinedSP = DB.createFunction( - CU, FuncName, FuncName, SP->getFile(), DL.getLine(), Ty, - DL.getLine(), DINode::DIFlags::FlagArtificial, SPFlags); - - // Attach subprogram to the function. - Func->setSubprogram(OutlinedSP); - // Update the CurrentDebugLocation in the builder so that right scope - // is used for things inside outlined function. - Builder.SetCurrentDebugLocation( - DILocation::get(Func->getContext(), DL.getLine(), DL.getCol(), - OutlinedSP, DL.getInlinedAt())); - } - } - } + // We will generate the entries in the outlined function but the debug + // location may still be pointing to the parent function. Reset it now. + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Generate the region into the function. BasicBlock *EntryBB = BasicBlock::Create(Builder.getContext(), "entry", Func); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3806db3ceab25..c1e1fec3ddef8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -5324,9 +5324,27 @@ static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto targetOp = cast(opInst); + // The current debug location already has the DISubprogram for the outlined + // function that will be created for the target op. We save it here so that + // we can set it on the outlined function. + llvm::DebugLoc outlinedFnLoc = builder.getCurrentDebugLocation(); if (failed(checkImplementationStatus(opInst))) return failure(); + // During the handling of target op, we will generate instructions in the + // parent function like call to the oulined function or branch to a new + // BasicBlock. We set the debug location here to parent function so that those + // get the correct debug locations. For outlined functions, the normal MLIR op + // conversion will automatically pick the correct location. + llvm::BasicBlock *parentBB = builder.GetInsertBlock(); + assert(parentBB && "No insert block is set for the builder"); + llvm::Function *parentLLVMFn = parentBB->getParent(); + assert(parentLLVMFn && "Parent Function must be valid"); + if (llvm::DISubprogram *SP = parentLLVMFn->getSubprogram()) + builder.SetCurrentDebugLocation(llvm::DILocation::get( + parentLLVMFn->getContext(), outlinedFnLoc.getLine(), + outlinedFnLoc.getCol(), SP, outlinedFnLoc.getInlinedAt())); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); bool isTargetDevice = ompBuilder->Config.isTargetDevice(); bool isGPU = ompBuilder->Config.isGPU(); @@ -5420,6 +5438,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, assert(llvmParentFn && llvmOutlinedFn && "Both parent and outlined functions must exist at this point"); + if (outlinedFnLoc && llvmParentFn->getSubprogram()) + llvmOutlinedFn->setSubprogram(outlinedFnLoc->getScope()->getSubprogram()); + if (auto attr = llvmParentFn->getFnAttribute("target-cpu"); attr.isStringAttribute()) llvmOutlinedFn->addFnAttr(attr); diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir new file mode 100644 index 0000000000000..45e5d2612e2c2 ---... [truncated] ``````````
https://github.com/llvm/llvm-project/pull/146532 From flang-commits at lists.llvm.org Tue Jul 1 07:00:53 2025 From: flang-commits at lists.llvm.org (Paul Walker via flang-commits) Date: Tue, 01 Jul 2025 07:00:53 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863ea15.050a0220.3c7d74.670f@mx.google.com> ================ @@ -3487,7 +3487,8 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group; def fveclib : Joined<["-"], "fveclib=">, Group, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, - HelpText<"Use the given vector functions library">, + HelpText<"Use the given vector functions library.\n" + " Note: -fveclib=libmvec on AArch64 requires GLIBC 2.40 or newer.">, ---------------- paulwalker-arm wrote: The toolchain user will see a link time failure when trying to resolve a call to a vector math routine. Note: the exact requirement depends on the function in question, but version 2.40 is what was used to decide which math functions to add mappings for. https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 07:01:01 2025 From: flang-commits at lists.llvm.org (Abid Qadeer via flang-commits) Date: Tue, 01 Jul 2025 07:01:01 -0700 (PDT) Subject: [flang-commits] [flang] [flang][debug] Generate DISubprogramAttr for omp::TargetOp. (PR #138039) In-Reply-To: Message-ID: <6863ea1d.050a0220.122345.6a74@mx.google.com> https://github.com/abidh closed https://github.com/llvm/llvm-project/pull/138039 From flang-commits at lists.llvm.org Tue Jul 1 07:01:03 2025 From: flang-commits at lists.llvm.org (Abid Qadeer via flang-commits) Date: Tue, 01 Jul 2025 07:01:03 -0700 (PDT) Subject: [flang-commits] [flang] [flang][debug] Generate DISubprogramAttr for omp::TargetOp. (PR #138039) In-Reply-To: Message-ID: <6863ea1f.170a0220.f9435.c8da@mx.google.com> abidh wrote: I have opened https://github.com/llvm/llvm-project/pull/146532 which is combination of https://github.com/llvm/llvm-project/pull/138039 and https://github.com/llvm/llvm-project/pull/138149. Closing it. https://github.com/llvm/llvm-project/pull/138039 From flang-commits at lists.llvm.org Tue Jul 1 07:01:48 2025 From: flang-commits at lists.llvm.org (Paul Walker via flang-commits) Date: Tue, 01 Jul 2025 07:01:48 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [Flang][AArch64][VecLib] Add libmvec support for Flang/AArch64 (PR #146453) In-Reply-To: Message-ID: <6863ea4c.170a0220.dc008.c25e@mx.google.com> https://github.com/paulwalker-arm edited https://github.com/llvm/llvm-project/pull/146453 From flang-commits at lists.llvm.org Tue Jul 1 07:03:42 2025 From: flang-commits at lists.llvm.org (Kajetan Puchalski via flang-commits) Date: Tue, 01 Jul 2025 07:03:42 -0700 (PDT) Subject: [flang-commits] [flang] [flang][tco] Add -emit-final-mlir flag (PR #146533) Message-ID: https://github.com/mrkajetanp created https://github.com/llvm/llvm-project/pull/146533 Add a flag to tco for emitting the final MLIR, prior to lowering to LLVM IR. This is intended to produce output that can be passed directly to mlir-translate. >From 072a3cfd4fdfcb43f7cb292879b6daf5f8540de5 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Tue, 1 Jul 2025 13:56:43 +0000 Subject: [PATCH] [flang][tco] Add -emit-final-mlir flag Add a flag to tco for emitting the final MLIR, prior to lowering to LLVM IR. This is intended to produce output that can be passed directly to mlir-translate. Signed-off-by: Kajetan Puchalski --- flang/tools/tco/tco.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp index eaf4bae088454..806cd3148d09f 100644 --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -70,6 +70,11 @@ static cl::opt codeGenLLVM( cl::desc("Run only CodeGen passes and translate FIR to LLVM IR"), cl::init(false)); +static cl::opt emitFinalMLIR( + "emit-final-mlir", + cl::desc("Only translate FIR to MLIR, do not lower to LLVM IR"), + cl::init(false)); + #include "flang/Optimizer/Passes/CommandLineOpts.h" #include "flang/Optimizer/Passes/Pipelines.h" @@ -149,13 +154,15 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { fir::registerDefaultInlinerPass(config); fir::createMLIRToLLVMPassPipeline(pm, config); } - fir::addLLVMDialectToLLVMPass(pm, out.os()); + if (!emitFinalMLIR) + fir::addLLVMDialectToLLVMPass(pm, out.os()); } // run the pass manager if (mlir::succeeded(pm.run(*owningRef))) { // passes ran successfully, so keep the output - if ((emitFir || passPipeline.hasAnyOccurrences()) && !codeGenLLVM) + if ((emitFir || passPipeline.hasAnyOccurrences() || emitFinalMLIR) && + !codeGenLLVM) printModule(*owningRef, out.os()); out.keep(); return mlir::success(); From flang-commits at lists.llvm.org Tue Jul 1 07:05:08 2025 From: flang-commits at lists.llvm.org (Kajetan Puchalski via flang-commits) Date: Tue, 01 Jul 2025 07:05:08 -0700 (PDT) Subject: [flang-commits] [flang] [flang][tco] Add -emit-final-mlir flag (PR #146533) In-Reply-To: Message-ID: <6863eb14.050a0220.20a40c.4c6f@mx.google.com> mrkajetanp wrote: I can also rename to just `-emit-mlir` for brevity, but I wasn't sure whether that'd be confusing or not on account of it all technically being mlir. https://github.com/llvm/llvm-project/pull/146533 From flang-commits at lists.llvm.org Tue Jul 1 07:08:37 2025 From: flang-commits at lists.llvm.org (Akash Banerjee via flang-commits) Date: Tue, 01 Jul 2025 07:08:37 -0700 (PDT) Subject: [flang-commits] [flang] [mlir] [MLIR] Add ComplexTOROCDL pass (PR #144926) In-Reply-To: Message-ID: <6863ebe5.170a0220.2e613c.b97a@mx.google.com> TIFitis wrote: Polite reminder for review, thanks. https://github.com/llvm/llvm-project/pull/144926 From flang-commits at lists.llvm.org Tue Jul 1 07:09:55 2025 From: flang-commits at lists.llvm.org (Akash Banerjee via flang-commits) Date: Tue, 01 Jul 2025 07:09:55 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP] Add TargetAMDGPU support for Complex argument and return types (PR #144924) In-Reply-To: Message-ID: <6863ec33.050a0220.15ac0f.5414@mx.google.com> TIFitis wrote: Polite reminder for review, thanks. https://github.com/llvm/llvm-project/pull/144924 From flang-commits at lists.llvm.org Tue Jul 1 07:11:33 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 07:11:33 -0700 (PDT) Subject: [flang-commits] [flang] [flang][tco] Add -emit-final-mlir flag (PR #146533) In-Reply-To: Message-ID: <6863ec95.630a0220.86e02.9f13@mx.google.com> https://github.com/tblah commented: Great to see this, I'm sure I won't be the only one who can save time not having to dump mlir after every pass. I think the name is good. Please could you add a test that uses this flag and checks that what it gets is LLVM dialect MLIR. I'll add some other reviewers who often work on LLVMIR codegen as they may also find this useful. https://github.com/llvm/llvm-project/pull/146533 From flang-commits at lists.llvm.org Tue Jul 1 07:14:25 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 07:14:25 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863ed41.170a0220.3008c9.d840@mx.google.com> https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/146423 >From 4cfcf45d8250d648bd4ae0a9110b5034d2495149 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 20 Jun 2025 06:48:18 -0500 Subject: [PATCH 1/5] [flang][OpenMP] Rewrite min/max with more than 2 arguments Given an atomic operation `w = max(w, x1, x2, ...)` rewrite it as `w = max(w, max(x1, x2, ...))`. This will avoid unnecessary non-atomic comparisons inside of the atomic operation (min/max are expanded inline). In particular, if some of the x_i's are optional dummy parameters in the containing function, this will avoid any presence tests within the atomic operation. Fixes https://github.com/llvm/llvm-project/issues/144838 --- flang/lib/Lower/OpenMP/Atomic.cpp | 279 +++++++++++++++++- flang/test/Lower/OpenMP/atomic-update.f90 | 12 +- .../Lower/OpenMP/max-optional-parameters.f90 | 24 ++ 3 files changed, 302 insertions(+), 13 deletions(-) create mode 100644 flang/test/Lower/OpenMP/max-optional-parameters.f90 diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index 33a743f8f9dda..b7a223eff80c5 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -11,6 +11,8 @@ #include "flang/Evaluate/expression.h" #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" +#include "flang/Evaluate/traverse.h" +#include "flang/Evaluate/type.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" @@ -41,6 +43,178 @@ namespace omp { using namespace Fortran::lower::omp; } +namespace { +// An example of a type that can be used to get the return value from +// the visitor: +// visitor(type_identity) -> result_type +using SomeArgType = evaluate::Type; + +struct GetProc + : public evaluate::Traverse { + using Result = const evaluate::ProcedureDesignator *; + using Base = evaluate::Traverse; + GetProc() : Base(*this) {} + + using Base::operator(); + + static Result Default() { return nullptr; } + + Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; } + static Result Combine(Result a, Result b) { return a != nullptr ? a : b; } +}; + +struct WithType { + WithType(const evaluate::DynamicType &t) : type(t) { + assert(type.category() != common::TypeCategory::Derived && + "Type cannot be a derived type"); + } + + template // + auto visit(VisitorTy &&visitor) const + -> std::invoke_result_t { + switch (type.category()) { + case common::TypeCategory::Integer: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Unsigned: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Real: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Complex: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Logical: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Character: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Derived: + break; + } + llvm_unreachable("Unhandled type"); + } + + const evaluate::DynamicType &type; + +private: + // Shorter names. + static constexpr auto Character = common::TypeCategory::Character; + static constexpr auto Complex = common::TypeCategory::Complex; + static constexpr auto Derived = common::TypeCategory::Derived; + static constexpr auto Integer = common::TypeCategory::Integer; + static constexpr auto Logical = common::TypeCategory::Logical; + static constexpr auto Real = common::TypeCategory::Real; + static constexpr auto Unsigned = common::TypeCategory::Unsigned; +}; + +template > +U AsRvalue(T &t) { + U copy{t}; + return std::move(copy); +} + +template +T &&AsRvalue(T &&t) { + return std::move(t); +} + +struct ArgumentReplacer + : public evaluate::Traverse { + using Base = evaluate::Traverse; + using Result = bool; + + Result Default() const { return false; } + + ArgumentReplacer(evaluate::ActualArguments &&newArgs) + : Base(*this), args_(std::move(newArgs)) {} + + using Base::operator(); + + template + Result operator()(const evaluate::FunctionRef &x) { + assert(!done_); + auto &mut = const_cast &>(x); + mut.arguments() = args_; + done_ = true; + return true; + } + + Result Combine(Result &&a, Result &&b) { return a || b; } + +private: + bool done_{false}; + evaluate::ActualArguments &&args_; +}; +} // namespace + [[maybe_unused]] static void dumpAtomicAnalysis(const parser::OpenMPAtomicConstruct::Analysis &analysis) { auto whatStr = [](int k) { @@ -237,6 +411,85 @@ makeMemOrderAttr(lower::AbstractConverter &converter, return nullptr; } +static bool replaceArgs(semantics::SomeExpr &expr, + evaluate::ActualArguments &&newArgs) { + return ArgumentReplacer(std::move(newArgs))(expr); +} + +static semantics::SomeExpr makeCall(const evaluate::DynamicType &type, + const evaluate::ProcedureDesignator &proc, + const evaluate::ActualArguments &args) { + return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr { + using Type = typename llvm::remove_cvref_t::type; + return evaluate::AsGenericExpr( + evaluate::FunctionRef(AsRvalue(proc), AsRvalue(args))); + }); +} + +static const evaluate::ProcedureDesignator & +getProcedureDesignator(const semantics::SomeExpr &call) { + const evaluate::ProcedureDesignator *proc = GetProc{}(call); + assert(proc && "Call has no procedure designator"); + return *proc; +} + +static semantics::SomeExpr // +genReducedMinMax(const semantics::SomeExpr &orig, + const semantics::SomeExpr *atomArg, + const std::vector &args) { + // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] + // One of the a_i's, say a_t, must be atom (or a convert of atom). + // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate + // call = min/max(a_t, tmp). + // Return "call". + + // The min/max intrinsics have 2 mandatory arguments, the rest is optional. + // Make sure that the "tmp = min/max(...)" doesn't promote an optional + // argument to a non-optional position. This could happen if a_t is at + // position 0 or 1. + if (args.size() <= 2) + return orig; + + evaluate::ActualArguments nonAtoms; + + auto AsActual = [](const semantics::SomeExpr &x) { + semantics::SomeExpr copy = x; + return evaluate::ActualArgument(std::move(copy)); + }; + // Semantic checks guarantee that the "atom" shows exactly once in the + // argument list (with potential conversions around it). + // For the first two (non-optional) arguments, if "atom" is among them, + // replace it with another occurrence of the other non-optional argument. + if (atomArg == &args[0]) { + // (atom, x, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[1])); + nonAtoms.push_back(AsActual(args[1])); + } else if (atomArg == &args[1]) { + // (x, atom, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[0])); + } else { + // (x, y, z...) -> unchanged + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[1])); + } + + // The rest of arguments are optional, so we can just skip "atom". + for (size_t i = 2, e = args.size(); i != e; ++i) { + if (atomArg != &args[i]) + nonAtoms.push_back(AsActual(args[i])); + } + + // The type of the intermediate min/max is the same as the type of its + // arguments, which may be different from the type of the original + // expression. The original expression may have additional coverts. + auto tmp = + makeCall(*atomArg->GetType(), getProcedureDesignator(orig), nonAtoms); + semantics::SomeExpr call = orig; + replaceArgs(call, {AsActual(*atomArg), AsActual(tmp)}); + return call; +} + static mlir::Operation * // genAtomicRead(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, mlir::Location loc, @@ -350,10 +603,28 @@ genAtomicUpdate(lower::AbstractConverter &converter, mlir::Type atomType = fir::unwrapRefType(atomAddr.getType()); // This must exist by now. - semantics::SomeExpr input = *evaluate::GetConvertInput(assign.rhs); - std::vector args = - evaluate::GetTopLevelOperation(input).second; + semantics::SomeExpr rhs = assign.rhs; + semantics::SomeExpr input = *evaluate::GetConvertInput(rhs); + auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); + + const semantics::SomeExpr *atomArg = [&]() { + for (const semantics::SomeExpr &e : args) { + if (evaluate::IsSameOrConvertOf(e, atom)) + return &e; + } + llvm_unreachable("Atomic variable not in argument list"); + }(); + + if (opcode == evaluate::operation::Operator::Min || + opcode == evaluate::operation::Operator::Max) { + // Min and max operations are expanded inline, so reduce them to + // operations with exactly two (non-optional) arguments. + rhs = genReducedMinMax(rhs, atomArg, args); + input = *evaluate::GetConvertInput(rhs); + std::tie(opcode, args) = evaluate::GetTopLevelOperation(input); + atomArg = nullptr; // No longer valid. + } for (auto &arg : args) { if (!evaluate::IsSameOrConvertOf(arg, atom)) { mlir::Value val = fir::getBase(converter.genExprValue(arg, naCtx, &loc)); @@ -372,7 +643,7 @@ genAtomicUpdate(lower::AbstractConverter &converter, converter.overrideExprValues(&overrides); mlir::Value updated = - fir::getBase(converter.genExprValue(assign.rhs, stmtCtx, &loc)); + fir::getBase(converter.genExprValue(rhs, stmtCtx, &loc)); mlir::Value converted = builder.createConvert(loc, atomType, updated); builder.create(loc, converted); converter.resetExprOverrides(); diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 index 3f840acefa6e8..f88bbea6fca85 100644 --- a/flang/test/Lower/OpenMP/atomic-update.f90 +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -107,8 +107,6 @@ program OmpAtomicUpdate !CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_Y_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:.*]]: i32): !CHECK: {{.*}} = arith.cmpi sgt, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.select {{.*}}, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.cmpi sgt, {{.*}} !CHECK: %[[TEMP:.*]] = arith.select {{.*}} : i32 !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } @@ -177,13 +175,9 @@ program OmpAtomicUpdate !CHECK: %[[VAL_Z_LOADED:.*]] = fir.load %[[VAL_Z_DECLARE]]#0 : !fir.ref !CHECK: omp.atomic.update %[[VAL_W_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG_W:.*]]: i32): -!CHECK: %[[WX_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WX_MIN:.*]] = arith.select %[[WX_CMP]], %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WXY_CMP:.*]] = arith.cmpi sgt, %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXY_MIN:.*]] = arith.select %[[WXY_CMP]], %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXYZ_CMP:.*]] = arith.cmpi sgt, %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: %[[WXYZ_MIN:.*]] = arith.select %[[WXYZ_CMP]], %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: omp.yield(%[[WXYZ_MIN]] : i32) +!CHECK: %[[W_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], {{.*}} : i32 +!CHECK: %[[WXYZ_MAX:.*]] = arith.select %[[W_CMP]], %[[ARG_W]], {{.*}} : i32 +!CHECK: omp.yield(%[[WXYZ_MAX]] : i32) !CHECK: } !$omp atomic update w = max(w,x,y,z) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90 new file mode 100644 index 0000000000000..2bb6b110cb0ef --- /dev/null +++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90 @@ -0,0 +1,24 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present +!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } else { +!CHECK: fir.result {{.*}} : f32 +!CHECK: } +!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): +!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: omp.yield(%[[V11]] : f32) +!CHECK: } + +subroutine f00(a, x, y) + real :: a + real, optional :: x, y + !$omp atomic update + a = max(x, a, y) +end >From 4302ed81021028baec7427028562802a516ea511 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 07:38:45 -0500 Subject: [PATCH 2/5] Address review comments --- flang/lib/Lower/OpenMP/Atomic.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index b7a223eff80c5..2ab91b239a3cc 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -157,6 +157,7 @@ struct WithType { } break; case common::TypeCategory::Derived: + (void)Derived; break; } llvm_unreachable("Unhandled type"); @@ -438,7 +439,7 @@ genReducedMinMax(const semantics::SomeExpr &orig, const semantics::SomeExpr *atomArg, const std::vector &args) { // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] - // One of the a_i's, say a_t, must be atom (or a convert of atom). + // One of the a_i's, say a_t, must be atomArg. // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate // call = min/max(a_t, tmp). // Return "call". @@ -608,13 +609,14 @@ genAtomicUpdate(lower::AbstractConverter &converter, auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); - const semantics::SomeExpr *atomArg = [&]() { + // Pass args as an argument to avoid capturing a structured binding. + const semantics::SomeExpr *atomArg = [&](auto &args) { for (const semantics::SomeExpr &e : args) { if (evaluate::IsSameOrConvertOf(e, atom)) return &e; } llvm_unreachable("Atomic variable not in argument list"); - }(); + }(args); if (opcode == evaluate::operation::Operator::Min || opcode == evaluate::operation::Operator::Max) { >From 16df2e0eb775fce156db6f5fc62509026a0b2bc5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 08:01:23 -0500 Subject: [PATCH 3/5] Check for non-atomic operations, add min --- .../Lower/OpenMP/max-optional-parameters.f90 | 58 ++++++++++++++++--- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90 index 2bb6b110cb0ef..57a432eeeecec 100644 --- a/flang/test/Lower/OpenMP/max-optional-parameters.f90 +++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90 @@ -3,16 +3,27 @@ ! Check that the presence tests are done outside of the atomic update ! construct. -!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present -!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) { -!CHECK: fir.result {{.*}} : f32 +!CHECK-LABEL: func.func @_QPf00 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpf ogt, %[[V4]], %[[V5]] fastmath : f32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : f32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (f32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpf ogt, %[[V8]], %[[V10]] fastmath : f32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : f32 +!CHECK: fir.result %[[V12]] : f32 !CHECK: } else { -!CHECK: fir.result {{.*}} : f32 +!CHECK: fir.result %[[V8]] : f32 !CHECK: } -!CHECK: omp.atomic.update {{.*}} : !fir.ref { +!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): -!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]] -!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]] +!CHECK: %[[V10:[0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[V9]] fastmath : f32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : f32 !CHECK: omp.yield(%[[V11]] : f32) !CHECK: } @@ -22,3 +33,36 @@ subroutine f00(a, x, y) !$omp atomic update a = max(x, a, y) end + + +!CHECK-LABEL: func.func @_QPf01 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpi slt, %[[V4]], %[[V5]] : i32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : i32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (i32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpi slt, %[[V8]], %[[V10]] : i32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : i32 +!CHECK: fir.result %[[V12]] : i32 +!CHECK: } else { +!CHECK: fir.result %[[V8]] : i32 +!CHECK: } +!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: i32): +!CHECK: %[[V10:[0-9]+]] = arith.cmpi slt, %[[ARG]], %[[V9]] : i32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : i32 +!CHECK: omp.yield(%[[V11]] : i32) +!CHECK: } + +subroutine f01(a, x, y) + integer :: a + integer, optional :: x, y + !$omp atomic update + a = min(x, a, y) +end + >From 6c45f7c32a4919ee950703377fa1b5c8f0257fc2 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 09:13:35 -0500 Subject: [PATCH 4/5] Fix test --- flang/test/Lower/OpenMP/max-optional-parameters.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90 index 57a432eeeecec..418a3cad8cdaf 100644 --- a/flang/test/Lower/OpenMP/max-optional-parameters.f90 +++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90 @@ -20,7 +20,7 @@ !CHECK: } else { !CHECK: fir.result %[[V8]] : f32 !CHECK: } -!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { +!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_A]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): !CHECK: %[[V10:[0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[V9]] fastmath : f32 !CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : f32 @@ -52,7 +52,7 @@ subroutine f00(a, x, y) !CHECK: } else { !CHECK: fir.result %[[V8]] : i32 !CHECK: } -!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref { +!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_A]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: i32): !CHECK: %[[V10:[0-9]+]] = arith.cmpi slt, %[[ARG]], %[[V9]] : i32 !CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : i32 >From 3aa831fe921cb2ff40d24206dc2d98909f681e94 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 1 Jul 2025 09:13:59 -0500 Subject: [PATCH 5/5] Rename test --- ...max-optional-parameters.f90 => minmax-optional-parameters.f90} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename flang/test/Lower/OpenMP/{max-optional-parameters.f90 => minmax-optional-parameters.f90} (100%) diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/minmax-optional-parameters.f90 similarity index 100% rename from flang/test/Lower/OpenMP/max-optional-parameters.f90 rename to flang/test/Lower/OpenMP/minmax-optional-parameters.f90 From flang-commits at lists.llvm.org Tue Jul 1 07:16:12 2025 From: flang-commits at lists.llvm.org (Tom Eccles via flang-commits) Date: Tue, 01 Jul 2025 07:16:12 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863edac.170a0220.382880.d311@mx.google.com> https://github.com/tblah approved this pull request. LGTM if Kiran is happy https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 07:52:17 2025 From: flang-commits at lists.llvm.org (Kiran Chandramohan via flang-commits) Date: Tue, 01 Jul 2025 07:52:17 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863f621.050a0220.23a56e.8a20@mx.google.com> kiranchandramohan wrote: > Eventually both of these will be done in semantics. OK. > Meanwhile, there is another issue, this time with reassociation: the expression w = w .and. x .and. y, which is parsed as w = (w .and. x) .and. y, is rejected since w is not a top-level argument. There is https://github.com/llvm/llvm-project/issues/138748 assigned to me. If you are working on this, please feel free to assign to yourself. Thanks for the explanations. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 07:52:30 2025 From: flang-commits at lists.llvm.org (Kiran Chandramohan via flang-commits) Date: Tue, 01 Jul 2025 07:52:30 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863f62e.630a0220.3835e0.fd8b@mx.google.com> https://github.com/kiranchandramohan approved this pull request. LG. https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 07:55:02 2025 From: flang-commits at lists.llvm.org (via flang-commits) Date: Tue, 01 Jul 2025 07:55:02 -0700 (PDT) Subject: [flang-commits] [flang] 86077c4 - [flang][OpenMP] Rewrite min/max with more than 2 arguments (#146423) Message-ID: <6863f6c6.170a0220.2821cb.e75d@mx.google.com> Author: Krzysztof Parzyszek Date: 2025-07-01T09:54:58-05:00 New Revision: 86077c41a7899fb3a3ce4654bdb373e7cd954f49 URL: https://github.com/llvm/llvm-project/commit/86077c41a7899fb3a3ce4654bdb373e7cd954f49 DIFF: https://github.com/llvm/llvm-project/commit/86077c41a7899fb3a3ce4654bdb373e7cd954f49.diff LOG: [flang][OpenMP] Rewrite min/max with more than 2 arguments (#146423) Given an atomic operation `w = max(w, x1, x2, ...)` rewrite it as `w = max(w, max(x1, x2, ...))`. This will avoid unnecessary non-atomic comparisons inside of the atomic operation (min/max are expanded inline). In particular, if some of the x_i's are optional dummy parameters in the containing function, this will avoid any presence tests within the atomic operation. Fixes https://github.com/llvm/llvm-project/issues/144838 Added: flang/test/Lower/OpenMP/minmax-optional-parameters.f90 Modified: flang/lib/Lower/OpenMP/Atomic.cpp flang/test/Lower/OpenMP/atomic-update.f90 Removed: ################################################################################ diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index 33a743f8f9dda..2ab91b239a3cc 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -11,6 +11,8 @@ #include "flang/Evaluate/expression.h" #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" +#include "flang/Evaluate/traverse.h" +#include "flang/Evaluate/type.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" @@ -41,6 +43,179 @@ namespace omp { using namespace Fortran::lower::omp; } +namespace { +// An example of a type that can be used to get the return value from +// the visitor: +// visitor(type_identity) -> result_type +using SomeArgType = evaluate::Type; + +struct GetProc + : public evaluate::Traverse { + using Result = const evaluate::ProcedureDesignator *; + using Base = evaluate::Traverse; + GetProc() : Base(*this) {} + + using Base::operator(); + + static Result Default() { return nullptr; } + + Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; } + static Result Combine(Result a, Result b) { return a != nullptr ? a : b; } +}; + +struct WithType { + WithType(const evaluate::DynamicType &t) : type(t) { + assert(type.category() != common::TypeCategory::Derived && + "Type cannot be a derived type"); + } + + template // + auto visit(VisitorTy &&visitor) const + -> std::invoke_result_t { + switch (type.category()) { + case common::TypeCategory::Integer: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Unsigned: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Real: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Complex: + switch (type.kind()) { + case 2: + return visitor(llvm::type_identity>{}); + case 3: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + case 10: + return visitor(llvm::type_identity>{}); + case 16: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Logical: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + case 8: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Character: + switch (type.kind()) { + case 1: + return visitor(llvm::type_identity>{}); + case 2: + return visitor(llvm::type_identity>{}); + case 4: + return visitor(llvm::type_identity>{}); + } + break; + case common::TypeCategory::Derived: + (void)Derived; + break; + } + llvm_unreachable("Unhandled type"); + } + + const evaluate::DynamicType &type; + +private: + // Shorter names. + static constexpr auto Character = common::TypeCategory::Character; + static constexpr auto Complex = common::TypeCategory::Complex; + static constexpr auto Derived = common::TypeCategory::Derived; + static constexpr auto Integer = common::TypeCategory::Integer; + static constexpr auto Logical = common::TypeCategory::Logical; + static constexpr auto Real = common::TypeCategory::Real; + static constexpr auto Unsigned = common::TypeCategory::Unsigned; +}; + +template > +U AsRvalue(T &t) { + U copy{t}; + return std::move(copy); +} + +template +T &&AsRvalue(T &&t) { + return std::move(t); +} + +struct ArgumentReplacer + : public evaluate::Traverse { + using Base = evaluate::Traverse; + using Result = bool; + + Result Default() const { return false; } + + ArgumentReplacer(evaluate::ActualArguments &&newArgs) + : Base(*this), args_(std::move(newArgs)) {} + + using Base::operator(); + + template + Result operator()(const evaluate::FunctionRef &x) { + assert(!done_); + auto &mut = const_cast &>(x); + mut.arguments() = args_; + done_ = true; + return true; + } + + Result Combine(Result &&a, Result &&b) { return a || b; } + +private: + bool done_{false}; + evaluate::ActualArguments &&args_; +}; +} // namespace + [[maybe_unused]] static void dumpAtomicAnalysis(const parser::OpenMPAtomicConstruct::Analysis &analysis) { auto whatStr = [](int k) { @@ -237,6 +412,85 @@ makeMemOrderAttr(lower::AbstractConverter &converter, return nullptr; } +static bool replaceArgs(semantics::SomeExpr &expr, + evaluate::ActualArguments &&newArgs) { + return ArgumentReplacer(std::move(newArgs))(expr); +} + +static semantics::SomeExpr makeCall(const evaluate::DynamicType &type, + const evaluate::ProcedureDesignator &proc, + const evaluate::ActualArguments &args) { + return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr { + using Type = typename llvm::remove_cvref_t::type; + return evaluate::AsGenericExpr( + evaluate::FunctionRef(AsRvalue(proc), AsRvalue(args))); + }); +} + +static const evaluate::ProcedureDesignator & +getProcedureDesignator(const semantics::SomeExpr &call) { + const evaluate::ProcedureDesignator *proc = GetProc{}(call); + assert(proc && "Call has no procedure designator"); + return *proc; +} + +static semantics::SomeExpr // +genReducedMinMax(const semantics::SomeExpr &orig, + const semantics::SomeExpr *atomArg, + const std::vector &args) { + // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...] + // One of the a_i's, say a_t, must be atomArg. + // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate + // call = min/max(a_t, tmp). + // Return "call". + + // The min/max intrinsics have 2 mandatory arguments, the rest is optional. + // Make sure that the "tmp = min/max(...)" doesn't promote an optional + // argument to a non-optional position. This could happen if a_t is at + // position 0 or 1. + if (args.size() <= 2) + return orig; + + evaluate::ActualArguments nonAtoms; + + auto AsActual = [](const semantics::SomeExpr &x) { + semantics::SomeExpr copy = x; + return evaluate::ActualArgument(std::move(copy)); + }; + // Semantic checks guarantee that the "atom" shows exactly once in the + // argument list (with potential conversions around it). + // For the first two (non-optional) arguments, if "atom" is among them, + // replace it with another occurrence of the other non-optional argument. + if (atomArg == &args[0]) { + // (atom, x, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[1])); + nonAtoms.push_back(AsActual(args[1])); + } else if (atomArg == &args[1]) { + // (x, atom, y...) -> (x, x, y...) + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[0])); + } else { + // (x, y, z...) -> unchanged + nonAtoms.push_back(AsActual(args[0])); + nonAtoms.push_back(AsActual(args[1])); + } + + // The rest of arguments are optional, so we can just skip "atom". + for (size_t i = 2, e = args.size(); i != e; ++i) { + if (atomArg != &args[i]) + nonAtoms.push_back(AsActual(args[i])); + } + + // The type of the intermediate min/max is the same as the type of its + // arguments, which may be diff erent from the type of the original + // expression. The original expression may have additional coverts. + auto tmp = + makeCall(*atomArg->GetType(), getProcedureDesignator(orig), nonAtoms); + semantics::SomeExpr call = orig; + replaceArgs(call, {AsActual(*atomArg), AsActual(tmp)}); + return call; +} + static mlir::Operation * // genAtomicRead(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, mlir::Location loc, @@ -350,10 +604,29 @@ genAtomicUpdate(lower::AbstractConverter &converter, mlir::Type atomType = fir::unwrapRefType(atomAddr.getType()); // This must exist by now. - semantics::SomeExpr input = *evaluate::GetConvertInput(assign.rhs); - std::vector args = - evaluate::GetTopLevelOperation(input).second; + semantics::SomeExpr rhs = assign.rhs; + semantics::SomeExpr input = *evaluate::GetConvertInput(rhs); + auto [opcode, args] = evaluate::GetTopLevelOperation(input); assert(!args.empty() && "Update operation without arguments"); + + // Pass args as an argument to avoid capturing a structured binding. + const semantics::SomeExpr *atomArg = [&](auto &args) { + for (const semantics::SomeExpr &e : args) { + if (evaluate::IsSameOrConvertOf(e, atom)) + return &e; + } + llvm_unreachable("Atomic variable not in argument list"); + }(args); + + if (opcode == evaluate::operation::Operator::Min || + opcode == evaluate::operation::Operator::Max) { + // Min and max operations are expanded inline, so reduce them to + // operations with exactly two (non-optional) arguments. + rhs = genReducedMinMax(rhs, atomArg, args); + input = *evaluate::GetConvertInput(rhs); + std::tie(opcode, args) = evaluate::GetTopLevelOperation(input); + atomArg = nullptr; // No longer valid. + } for (auto &arg : args) { if (!evaluate::IsSameOrConvertOf(arg, atom)) { mlir::Value val = fir::getBase(converter.genExprValue(arg, naCtx, &loc)); @@ -372,7 +645,7 @@ genAtomicUpdate(lower::AbstractConverter &converter, converter.overrideExprValues(&overrides); mlir::Value updated = - fir::getBase(converter.genExprValue(assign.rhs, stmtCtx, &loc)); + fir::getBase(converter.genExprValue(rhs, stmtCtx, &loc)); mlir::Value converted = builder.createConvert(loc, atomType, updated); builder.create(loc, converted); converter.resetExprOverrides(); diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 index 3f840acefa6e8..f88bbea6fca85 100644 --- a/flang/test/Lower/OpenMP/atomic-update.f90 +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -107,8 +107,6 @@ program OmpAtomicUpdate !CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_Y_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG:.*]]: i32): !CHECK: {{.*}} = arith.cmpi sgt, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.select {{.*}}, %[[ARG]], {{.*}} : i32 -!CHECK: {{.*}} = arith.cmpi sgt, {{.*}} !CHECK: %[[TEMP:.*]] = arith.select {{.*}} : i32 !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } @@ -177,13 +175,9 @@ program OmpAtomicUpdate !CHECK: %[[VAL_Z_LOADED:.*]] = fir.load %[[VAL_Z_DECLARE]]#0 : !fir.ref !CHECK: omp.atomic.update %[[VAL_W_DECLARE]]#0 : !fir.ref { !CHECK: ^bb0(%[[ARG_W:.*]]: i32): -!CHECK: %[[WX_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WX_MIN:.*]] = arith.select %[[WX_CMP]], %[[ARG_W]], %[[VAL_X_LOADED]] : i32 -!CHECK: %[[WXY_CMP:.*]] = arith.cmpi sgt, %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXY_MIN:.*]] = arith.select %[[WXY_CMP]], %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32 -!CHECK: %[[WXYZ_CMP:.*]] = arith.cmpi sgt, %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: %[[WXYZ_MIN:.*]] = arith.select %[[WXYZ_CMP]], %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32 -!CHECK: omp.yield(%[[WXYZ_MIN]] : i32) +!CHECK: %[[W_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], {{.*}} : i32 +!CHECK: %[[WXYZ_MAX:.*]] = arith.select %[[W_CMP]], %[[ARG_W]], {{.*}} : i32 +!CHECK: omp.yield(%[[WXYZ_MAX]] : i32) !CHECK: } !$omp atomic update w = max(w,x,y,z) diff --git a/flang/test/Lower/OpenMP/minmax-optional-parameters.f90 b/flang/test/Lower/OpenMP/minmax-optional-parameters.f90 new file mode 100644 index 0000000000000..418a3cad8cdaf --- /dev/null +++ b/flang/test/Lower/OpenMP/minmax-optional-parameters.f90 @@ -0,0 +1,68 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s + +! Check that the presence tests are done outside of the atomic update +! construct. + +!CHECK-LABEL: func.func @_QPf00 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpf ogt, %[[V4]], %[[V5]] fastmath : f32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : f32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (f32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpf ogt, %[[V8]], %[[V10]] fastmath : f32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : f32 +!CHECK: fir.result %[[V12]] : f32 +!CHECK: } else { +!CHECK: fir.result %[[V8]] : f32 +!CHECK: } +!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_A]]#0 : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32): +!CHECK: %[[V10:[0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[V9]] fastmath : f32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : f32 +!CHECK: omp.yield(%[[V11]] : f32) +!CHECK: } + +subroutine f00(a, x, y) + real :: a + real, optional :: x, y + !$omp atomic update + a = max(x, a, y) +end + + +!CHECK-LABEL: func.func @_QPf01 +!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0 +!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0 +!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0 +!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref +!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref) -> i1 +!CHECK: %[[V7:[0-9]+]] = arith.cmpi slt, %[[V4]], %[[V5]] : i32 +!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : i32 +!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (i32) { +!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref +!CHECK: %[[V11:[0-9]+]] = arith.cmpi slt, %[[V8]], %[[V10]] : i32 +!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : i32 +!CHECK: fir.result %[[V12]] : i32 +!CHECK: } else { +!CHECK: fir.result %[[V8]] : i32 +!CHECK: } +!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_A]]#0 : !fir.ref { +!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: i32): +!CHECK: %[[V10:[0-9]+]] = arith.cmpi slt, %[[ARG]], %[[V9]] : i32 +!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : i32 +!CHECK: omp.yield(%[[V11]] : i32) +!CHECK: } + +subroutine f01(a, x, y) + integer :: a + integer, optional :: x, y + !$omp atomic update + a = min(x, a, y) +end + From flang-commits at lists.llvm.org Tue Jul 1 07:55:07 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 07:55:07 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423) In-Reply-To: Message-ID: <6863f6cb.050a0220.1f3361.aacc@mx.google.com> https://github.com/kparzysz closed https://github.com/llvm/llvm-project/pull/146423 From flang-commits at lists.llvm.org Tue Jul 1 08:01:34 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:34 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f84e.630a0220.1cc7cc.fe2a@mx.google.com> https://github.com/skatrak edited https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:34 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:34 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f84e.170a0220.e6823.00ef@mx.google.com> https://github.com/skatrak commented: Thank you Akash for this work, and excuse the delay! https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:35 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:35 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f84f.170a0220.115564.e706@mx.google.com> ================ @@ -3540,7 +3540,7 @@ WRAPPER_CLASS(OmpLocatorList, std::list); struct OmpMapperSpecifier { // Absent mapper-identifier is equivalent to DEFAULT. TUPLE_CLASS_BOILERPLATE(OmpMapperSpecifier); - std::tuple, TypeSpec, Name> t; + std::tuple t; ---------------- skatrak wrote: Since we haven't figured out a better alternative for this situation, I think we can hopefully get away with making this PFT node owner of the identifier name for now. However, this is a departure from the convention of only storing references to externally allocated strings, so I'd suggest adding a comment here to explain why we're making an exception (i.e. this identifier might be compiler-generated and not part of the source). https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:35 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:35 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f84f.050a0220.1d1bb5.bb50@mx.google.com> ================ @@ -1389,8 +1389,28 @@ TYPE_PARSER( TYPE_PARSER(sourced(construct( verbatim("DECLARE TARGET"_tok), Parser{}))) +static OmpMapperSpecifier ConstructOmpMapperSpecifier( ---------------- skatrak wrote: Nit: This name seems to follow other existing similar functions better, IMO. In any case, the first letter should be lowercase. ```suggestion static OmpMapperSpecifier makeMapperSpecifier( ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:35 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:35 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f84f.170a0220.229da7.14f2@mx.google.com> ================ @@ -1,9 +1,11 @@ !RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s +!CHECK: omp.declare_mapper @[[MAPPER1:_QQFmaptype_derived_implicit_allocatablescalar_and_array.omp.default.mapper]] : !fir.type<_QFmaptype_derived_implicit_allocatableTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> { +!CHECK: omp.declare_mapper @[[MAPPER2:_QQFmaptype_derived_implicitscalar_and_array.omp.default.mapper]] : !fir.type<_QFmaptype_derived_implicitTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> { ---------------- skatrak wrote: ```suggestion !CHECK: omp.declare_mapper @[[MAPPER2:_QQFmaptype_derived_implicitscalar_and_array\.omp\.default\.mapper]] : !fir.type<_QFmaptype_derived_implicitTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> { ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:35 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:35 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f84f.170a0220.123bb4.d4b7@mx.google.com> ================ @@ -1102,23 +1102,30 @@ void ClauseProcessor::processMapObjects( auto getDefaultMapperID = [&](const omp::Object &object, std::string &mapperIdName) { - if (!mlir::isa( - firOpBuilder.getRegion().getParentOp())) { - const semantics::DerivedTypeSpec *typeSpec = nullptr; - - if (object.sym()->owner().IsDerivedType()) - typeSpec = object.sym()->owner().derivedTypeSpec(); - else if (object.sym()->GetType() && - object.sym()->GetType()->category() == - semantics::DeclTypeSpec::TypeDerived) - typeSpec = &object.sym()->GetType()->derivedTypeSpec(); - - if (typeSpec) { - mapperIdName = typeSpec->name().ToString() + ".default"; + const semantics::DerivedTypeSpec *typeSpec = nullptr; + + if (object.sym()->GetType() && object.sym()->GetType()->category() == + semantics::DeclTypeSpec::TypeDerived) + typeSpec = &object.sym()->GetType()->derivedTypeSpec(); + else if (object.sym()->owner().IsDerivedType()) + typeSpec = object.sym()->owner().derivedTypeSpec(); + + if (typeSpec) { + mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) + mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + else mapperIdName = converter.mangleName(mapperIdName, *typeSpec->GetScope()); - } } + + // Make sure we don't return a mapper to self ---------------- skatrak wrote: ```suggestion // Make sure we don't return a mapper to self. ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:36 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:36 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f850.170a0220.271039.fa4e@mx.google.com> ================ @@ -2348,6 +2348,122 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, + mlir::Location loc, fir::RecordType recordType, + llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ®ion = declMapperOp.getRegion(); + firOpBuilder.createBlock(®ion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, + llvm::SmallVectorImpl &bounds) { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, + hlfir::Entity{mapVal}, + /*contiguousHint=*/true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); + bounds = fir::factory::genImplicitBoundsOps( + firOpBuilder, info, extVal, + /*dataExvIsAssumedSize=*/false, mapVal.getLoc()); + }; + + // Return a reference to the contents of a derived type with one field. + // Also return the field type. + const auto getFieldRef = + [&](mlir::Value rec, + unsigned index) -> std::tuple { + auto recType = mlir::dyn_cast( ---------------- skatrak wrote: Is `recType` not going to always match the `recordType` argument? This seems quite a complicated implementation if that's the case. `[fieldName, fieldTy]` would match `[memberName, memberType]` of the caller below. Let me know if I'm missing something here. https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:36 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:36 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f850.170a0220.36cb53.0bb4@mx.google.com> ================ @@ -171,3 +172,93 @@ subroutine declare_mapper_4 a%num = 40 !$omp end target end subroutine declare_mapper_4 + +!--- omp-declare-mapper-5.f90 +program declare_mapper_5 + implicit none + + type :: mytype + integer :: x, y + end type + + !CHECK: omp.declare_mapper @[[INNER_MAPPER_NAMED:_QQFFuse_innermy_mapper]] : [[MY_TYPE:!fir\.type<_QFTmytype\{x:i32,y:i32\}>]] + !CHECK: omp.declare_mapper @[[INNER_MAPPER_DEFAULT:_QQFFuse_innermytype.omp.default.mapper]] : [[MY_TYPE]] ---------------- skatrak wrote: ```suggestion !CHECK: omp.declare_mapper @[[INNER_MAPPER_DEFAULT:_QQFFuse_innermytype\.omp\.default\.mapper]] : [[MY_TYPE]] ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:36 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:36 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f850.170a0220.232821.0e00@mx.google.com> ================ @@ -2348,6 +2348,122 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, + mlir::Location loc, fir::RecordType recordType, + llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ®ion = declMapperOp.getRegion(); + firOpBuilder.createBlock(®ion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, + llvm::SmallVectorImpl &bounds) { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, + hlfir::Entity{mapVal}, + /*contiguousHint=*/true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); + bounds = fir::factory::genImplicitBoundsOps( + firOpBuilder, info, extVal, + /*dataExvIsAssumedSize=*/false, mapVal.getLoc()); + }; + + // Return a reference to the contents of a derived type with one field. + // Also return the field type. + const auto getFieldRef = + [&](mlir::Value rec, + unsigned index) -> std::tuple { + auto recType = mlir::dyn_cast( + fir::unwrapPassByRefType(rec.getType())); + auto [fieldName, fieldTy] = recType.getTypeList()[index]; + mlir::Value field = firOpBuilder.create( + loc, fir::FieldType::get(recType.getContext()), fieldName, recType, + fir::getTypeParams(rec)); + return {firOpBuilder.create( + loc, firOpBuilder.getRefType(fieldTy), rec, field), + fieldTy}; + }; + + mlir::omp::DeclareMapperInfoOperands clauseOps; + llvm::SmallVector> memberPlacementIndices; + llvm::SmallVector memberMapOps; + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + int64_t index = 0; ---------------- skatrak wrote: Nit: Use `llvm::enumerate` instead of an outside index variable. That way we avoid letting it go out-of-sync in the future if we introduce any `continue` statements inside of the loop body. https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:36 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:36 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f850.050a0220.23a5a8.8d08@mx.google.com> ================ @@ -1,9 +1,11 @@ !RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s +!CHECK: omp.declare_mapper @[[MAPPER1:_QQFmaptype_derived_implicit_allocatablescalar_and_array.omp.default.mapper]] : !fir.type<_QFmaptype_derived_implicit_allocatableTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> { ---------------- skatrak wrote: ```suggestion !CHECK: omp.declare_mapper @[[MAPPER1:_QQFmaptype_derived_implicit_allocatablescalar_and_array\.omp\.default\.mapper]] : !fir.type<_QFmaptype_derived_implicit_allocatableTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> { ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:36 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:36 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f850.170a0220.2e7d61.fd4e@mx.google.com> ================ @@ -2348,6 +2348,122 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, + mlir::Location loc, fir::RecordType recordType, + llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ®ion = declMapperOp.getRegion(); + firOpBuilder.createBlock(®ion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, + llvm::SmallVectorImpl &bounds) { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, + hlfir::Entity{mapVal}, + /*contiguousHint=*/true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); + bounds = fir::factory::genImplicitBoundsOps( + firOpBuilder, info, extVal, + /*dataExvIsAssumedSize=*/false, mapVal.getLoc()); + }; + + // Return a reference to the contents of a derived type with one field. + // Also return the field type. + const auto getFieldRef = + [&](mlir::Value rec, + unsigned index) -> std::tuple { + auto recType = mlir::dyn_cast( + fir::unwrapPassByRefType(rec.getType())); + auto [fieldName, fieldTy] = recType.getTypeList()[index]; + mlir::Value field = firOpBuilder.create( + loc, fir::FieldType::get(recType.getContext()), fieldName, recType, + fir::getTypeParams(rec)); + return {firOpBuilder.create( + loc, firOpBuilder.getRefType(fieldTy), rec, field), + fieldTy}; + }; + + mlir::omp::DeclareMapperInfoOperands clauseOps; + llvm::SmallVector> memberPlacementIndices; + llvm::SmallVector memberMapOps; + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + int64_t index = 0; + + // Populate the declareMapper region with the map information. + for (const auto &[memberName, memberType] : + mlir::dyn_cast(recordType).getTypeList()) { + auto [ref, type] = getFieldRef(declareOp.getBase(), index); + mlir::FlatSymbolRefAttr mapperId; + if (auto recType = mlir::dyn_cast(memberType)) { + std::string mapperIdName = + recType.getName().str() + ".omp.default.mapper"; + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) + mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + else if (auto *sym = converter.getCurrentScope().FindSymbol(memberName)) + mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + + if (converter.getModuleOp().lookupSymbol(mapperIdName)) + mapperId = mlir::FlatSymbolRefAttr::get(&converter.getMLIRContext(), + mapperIdName); + else + mapperId = genImplicitDefaultDeclareMapper(converter, loc, recType, + mapperIdName); + } + + llvm::SmallVector bounds; + genBoundsOps(ref, bounds); + mlir::Value mapOp = createMapInfoOp( + firOpBuilder, loc, ref, /*varPtrPtr=*/mlir::Value{}, "", bounds, ---------------- skatrak wrote: Nit: Could the symbol name be passed to this function, so that the name here and below could be populated? I would imagine it might be populated as `"symName + "%" + memberName"` here (which could also be the `symName` passed to the recursive self call above) and just `symName` below. https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:36 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:36 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f850.630a0220.1ddefc.fc3c@mx.google.com> ================ @@ -149,7 +150,7 @@ subroutine declare_mapper_4 integer :: num end type - !CHECK: omp.declare_mapper @[[MY_TYPE_MAPPER:_QQFdeclare_mapper_4my_type.default]] : [[MY_TYPE:!fir\.type<_QFdeclare_mapper_4Tmy_type\{num:i32\}>]] + !CHECK: omp.declare_mapper @[[MY_TYPE_MAPPER:_QQFdeclare_mapper_4my_type.omp.default.mapper]] : [[MY_TYPE:!fir\.type<_QFdeclare_mapper_4Tmy_type\{num:i32\}>]] ---------------- skatrak wrote: ```suggestion !CHECK: omp.declare_mapper @[[MY_TYPE_MAPPER:_QQFdeclare_mapper_4my_type\.omp\.default\.mapper]] : [[MY_TYPE:!fir\.type<_QFdeclare_mapper_4Tmy_type\{num:i32\}>]] ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:40 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:40 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f854.170a0220.2caeed.ea6e@mx.google.com> ================ @@ -2348,6 +2348,122 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, ---------------- skatrak wrote: Is this exclusively needed for implicitly mapped variables into `target` regions or should it be called for explicit `map` clauses for derived types when they don't specify any mapper? In particular, in `target`, `declare mapper`, `target enter data` and `target exit data` directives. Would we need this for any other map-like clause, like `use_device_ptr`, `use_device_addr` or `has_device_addr`? https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:40 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:40 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f854.170a0220.1357ad.0a39@mx.google.com> ================ @@ -2348,6 +2348,122 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, + mlir::Location loc, fir::RecordType recordType, + llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ®ion = declMapperOp.getRegion(); + firOpBuilder.createBlock(®ion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, + llvm::SmallVectorImpl &bounds) { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, + hlfir::Entity{mapVal}, + /*contiguousHint=*/true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); + bounds = fir::factory::genImplicitBoundsOps( + firOpBuilder, info, extVal, + /*dataExvIsAssumedSize=*/false, mapVal.getLoc()); + }; + + // Return a reference to the contents of a derived type with one field. + // Also return the field type. + const auto getFieldRef = + [&](mlir::Value rec, + unsigned index) -> std::tuple { + auto recType = mlir::dyn_cast( + fir::unwrapPassByRefType(rec.getType())); + auto [fieldName, fieldTy] = recType.getTypeList()[index]; + mlir::Value field = firOpBuilder.create( + loc, fir::FieldType::get(recType.getContext()), fieldName, recType, + fir::getTypeParams(rec)); + return {firOpBuilder.create( + loc, firOpBuilder.getRefType(fieldTy), rec, field), + fieldTy}; + }; + + mlir::omp::DeclareMapperInfoOperands clauseOps; + llvm::SmallVector> memberPlacementIndices; + llvm::SmallVector memberMapOps; + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + int64_t index = 0; + + // Populate the declareMapper region with the map information. + for (const auto &[memberName, memberType] : + mlir::dyn_cast(recordType).getTypeList()) { + auto [ref, type] = getFieldRef(declareOp.getBase(), index); + mlir::FlatSymbolRefAttr mapperId; + if (auto recType = mlir::dyn_cast(memberType)) { + std::string mapperIdName = + recType.getName().str() + ".omp.default.mapper"; + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) + mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + else if (auto *sym = converter.getCurrentScope().FindSymbol(memberName)) + mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + + if (converter.getModuleOp().lookupSymbol(mapperIdName)) + mapperId = mlir::FlatSymbolRefAttr::get(&converter.getMLIRContext(), + mapperIdName); + else + mapperId = genImplicitDefaultDeclareMapper(converter, loc, recType, + mapperIdName); ---------------- skatrak wrote: Nit: Since both calls to the new function are wrapped inside of the same logic, perhaps `genImplicitDefaultDeclareMapper` could be `getOrGenImplicitDefaultDeclareMapper` and do the lookup inside of the function. That way we ensure we won't create duplicated `DeclareMapperOp`s, if we decide to call this function from other spots in the future. https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:43 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:43 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f857.170a0220.325eb1.7f86@mx.google.com> ================ @@ -171,3 +172,93 @@ subroutine declare_mapper_4 a%num = 40 !$omp end target end subroutine declare_mapper_4 + +!--- omp-declare-mapper-5.f90 +program declare_mapper_5 + implicit none + + type :: mytype + integer :: x, y + end type + + !CHECK: omp.declare_mapper @[[INNER_MAPPER_NAMED:_QQFFuse_innermy_mapper]] : [[MY_TYPE:!fir\.type<_QFTmytype\{x:i32,y:i32\}>]] + !CHECK: omp.declare_mapper @[[INNER_MAPPER_DEFAULT:_QQFFuse_innermytype.omp.default.mapper]] : [[MY_TYPE]] + !CHECK: omp.declare_mapper @[[OUTER_MAPPER_NAMED:_QQFmy_mapper]] : [[MY_TYPE]] + !CHECK: omp.declare_mapper @[[OUTER_MAPPER_DEFAULT:_QQFmytype.omp.default.mapper]] : [[MY_TYPE]] ---------------- skatrak wrote: ```suggestion !CHECK: omp.declare_mapper @[[OUTER_MAPPER_DEFAULT:_QQFmytype\.omp\.default\.mapper]] : [[MY_TYPE]] ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:01:37 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 08:01:37 -0700 (PDT) Subject: [flang-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562) In-Reply-To: Message-ID: <6863f851.050a0220.acf46.8176@mx.google.com> ================ @@ -2348,6 +2348,122 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, + mlir::Location loc, fir::RecordType recordType, + llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ®ion = declMapperOp.getRegion(); + firOpBuilder.createBlock(®ion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, + llvm::SmallVectorImpl &bounds) { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, + hlfir::Entity{mapVal}, + /*contiguousHint=*/true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); + bounds = fir::factory::genImplicitBoundsOps( + firOpBuilder, info, extVal, + /*dataExvIsAssumedSize=*/false, mapVal.getLoc()); + }; + + // Return a reference to the contents of a derived type with one field. + // Also return the field type. + const auto getFieldRef = + [&](mlir::Value rec, + unsigned index) -> std::tuple { + auto recType = mlir::dyn_cast( ---------------- skatrak wrote: It's assumed that `recType != nullptr` below, so either `assert` this is the case by using `cast` or we need to handle that situation. ```suggestion auto recType = mlir::cast( ``` https://github.com/llvm/llvm-project/pull/140562 From flang-commits at lists.llvm.org Tue Jul 1 08:02:41 2025 From: flang-commits at lists.llvm.org (Krzysztof Parzyszek via flang-commits) Date: Tue, 01 Jul 2025 08:02:41 -0700 (PDT) Subject: [flang-commits] [flang] [flang][OpenMP] Split check-omp-structure.cpp into smaller files, NFC (PR #146359) In-Reply-To: Message-ID: <6863f891.170a0220.1ecc5b.00de@mx.google.com> kparzysz wrote: I've done `make -C b/x86/tools/flang/lib/Semantics/ clean` followed by `time make -C b/x86/tools/flang/lib/Semantics/ all -j8`. This PR: 4m55s main: 4m51s https://github.com/llvm/llvm-project/pull/146359 From flang-commits at lists.llvm.org Tue Jul 1 04:27:30 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 04:27:30 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [mlir] [flang][flang-driver][mlir][OpenMP] atomic control support (PR #143441) In-Reply-To: Message-ID: <6863c622.630a0220.2e2b82.8e94@mx.google.com> https://github.com/skatrak edited https://github.com/llvm/llvm-project/pull/143441 From flang-commits at lists.llvm.org Tue Jul 1 04:27:31 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 04:27:31 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [mlir] [flang][flang-driver][mlir][OpenMP] atomic control support (PR #143441) In-Reply-To: Message-ID: <6863c623.170a0220.22c045.07cf@mx.google.com> ================ @@ -53,6 +53,11 @@ class TargetOptions { /// Print verbose assembly bool asmVerbose = false; + + /// Atomic control options for AMD gpu ---------------- skatrak wrote: ```suggestion /// Atomic control options ``` https://github.com/llvm/llvm-project/pull/143441 From flang-commits at lists.llvm.org Tue Jul 1 04:27:31 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 04:27:31 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [mlir] [flang][flang-driver][mlir][OpenMP] atomic control support (PR #143441) In-Reply-To: Message-ID: <6863c623.170a0220.f5d04.94b3@mx.google.com> https://github.com/skatrak commented: Thank you Anchu, I only have some small suggestions and a couple of questions. https://github.com/llvm/llvm-project/pull/143441 From flang-commits at lists.llvm.org Tue Jul 1 04:27:31 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 04:27:31 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [mlir] [flang][flang-driver][mlir][OpenMP] atomic control support (PR #143441) In-Reply-To: Message-ID: <6863c623.170a0220.e1fcc.9845@mx.google.com> ================ @@ -58,6 +58,14 @@ void setTargetCPU(mlir::ModuleOp mod, llvm::StringRef cpu); /// Get the target CPU string from the Module or return a null reference. llvm::StringRef getTargetCPU(mlir::ModuleOp mod); +// Setters and getters for atomic control options. ---------------- skatrak wrote: Nit: All other getters/setters here are documented individually using doxygen-formatted comments. I'd suggest doing the same, even if a chunk of the description is going to repeat across multiple functions. https://github.com/llvm/llvm-project/pull/143441 From flang-commits at lists.llvm.org Tue Jul 1 04:27:31 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 04:27:31 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [mlir] [flang][flang-driver][mlir][OpenMP] atomic control support (PR #143441) In-Reply-To: Message-ID: <6863c623.170a0220.39f238.1470@mx.google.com> ================ @@ -53,6 +53,11 @@ class TargetOptions { /// Print verbose assembly bool asmVerbose = false; + + /// Atomic control options for AMD gpu + bool ignoreDenormalMode = false; + bool remoteMemory = false; + bool fineGrainedMemory = false; ---------------- skatrak wrote: Nit: I think it makes sense to include "atomic" within variables names and also associated getters/setters and attributes, to avoid confusion as to what these flags apply to. ```suggestion bool atomicIgnoreDenormalMode = false; bool atomicRemoteMemory = false; bool atomicFineGrainedMemory = false; ``` https://github.com/llvm/llvm-project/pull/143441 From flang-commits at lists.llvm.org Tue Jul 1 04:27:31 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 04:27:31 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [mlir] [flang][flang-driver][mlir][OpenMP] atomic control support (PR #143441) In-Reply-To: Message-ID: <6863c623.170a0220.38aae.9152@mx.google.com> ================ @@ -54,6 +54,21 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> { let assemblyFormat = "`<` struct(params) `>`"; } +//===----------------------------------------------------------------------===// +// AtomicControlAttr +//===----------------------------------------------------------------------===// + +// Atomic control attributes hold information about architectural +// characteristics which are required for lowering atomic operations. +def AtomicControlAttr : OpenMP_Attr<"AtomicControl", "atomic_control"> { + let parameters = + (ins DefaultValuedParameter<"bool", "false">:$ignore_denormal_mode, + DefaultValuedParameter<"bool", "false">:$fine_grained_memory, + DefaultValuedParameter<"bool", "false">:$remote_memory); + + let assemblyFormat = "`<` struct(params) `>`"; +} + ---------------- skatrak wrote: Nit: This definition should go before `DeclareTargetAttr`, since this file is alphabetically sorted. https://github.com/llvm/llvm-project/pull/143441 From flang-commits at lists.llvm.org Tue Jul 1 04:27:31 2025 From: flang-commits at lists.llvm.org (Sergio Afonso via flang-commits) Date: Tue, 01 Jul 2025 04:27:31 -0700 (PDT) Subject: [flang-commits] [clang] [flang] [mlir] [flang][flang-driver][mlir][OpenMP] atomic control support (PR #143441) In-Reply-To: Message-ID: <6863c623.170a0220.af388.8a39@mx.google.com> ================ @@ -88,6 +88,36 @@ void fir::setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu) { mod->setAttr(tuneCpuName, mlir::StringAttr::get(ctx, cpu)); } +static constexpr const char *ignoreDenormalModeName = + "fir.ignore.denormal.mode"; ---------------- skatrak wrote: Nit: Generally, dots are used to separate "scopes" in some hierarchy. I'd suggest following the `