[flang-commits] [flang] [flang][cuda] Do not produce data transfer in offloaded do concurrent (PR #147435)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Mon Jul 7 21:57:22 PDT 2025
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/147435
>From f6ae5598ac9967e407f74245cc47fff4e35ebe0f Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 7 Jul 2025 17:21:42 -0700
Subject: [PATCH 1/3] [flang][cuda] Do not produce data transfer in offloaded
do concurrent
---
.../include/flang/Optimizer/Builder/CUFCommon.h | 2 +-
flang/include/flang/Support/Fortran-features.h | 2 +-
flang/lib/Lower/Bridge.cpp | 4 +++-
flang/lib/Optimizer/Builder/CUFCommon.cpp | 4 +++-
flang/lib/Semantics/assignment.cpp | 14 ++++++++++++++
flang/lib/Semantics/assignment.h | 3 +++
flang/lib/Semantics/semantics.cpp | 11 ++++++-----
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 16 ++++++++++++++++
flang/tools/bbc/bbc.cpp | 8 ++++++++
9 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/CUFCommon.h b/flang/include/flang/Optimizer/Builder/CUFCommon.h
index 65b9cce1d2021..cfd8a2305d031 100644
--- a/flang/include/flang/Optimizer/Builder/CUFCommon.h
+++ b/flang/include/flang/Optimizer/Builder/CUFCommon.h
@@ -27,7 +27,7 @@ mlir::gpu::GPUModuleOp getOrCreateGPUModule(mlir::ModuleOp mod,
mlir::SymbolTable &symTab);
bool isCUDADeviceContext(mlir::Operation *op);
-bool isCUDADeviceContext(mlir::Region &);
+bool isCUDADeviceContext(mlir::Region &, bool isStdParEnabled = false);
bool isRegisteredDeviceGlobal(fir::GlobalOp op);
bool isRegisteredDeviceAttr(std::optional<cuf::DataAttribute> attr);
diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h
index 8a2b7b29a5233..393ee7d0687c6 100644
--- a/flang/include/flang/Support/Fortran-features.h
+++ b/flang/include/flang/Support/Fortran-features.h
@@ -55,7 +55,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
SavedLocalInSpecExpr, PrintNamelist, AssumedRankPassedToNonAssumedRank,
IgnoreIrrelevantAttributes, Unsigned, AmbiguousStructureConstructor,
ContiguousOkForSeqAssociation, ForwardRefExplicitTypeDummy,
- InaccessibleDeferredOverride, CudaWarpMatchFunction)
+ InaccessibleDeferredOverride, CudaWarpMatchFunction, StdPar)
// Portability and suspicious usage warnings
ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ff35840a6668c..1be64ea26d7ea 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4886,7 +4886,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
- bool isInDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
+ bool isInDeviceContext = cuf::isCUDADeviceContext(
+ builder.getRegion(), getFoldingContext().languageFeatures().IsEnabled(
+ Fortran::common::LanguageFeature::StdPar));
bool isCUDATransfer =
IsCUDADataTransfer(assign.lhs, assign.rhs) && !isInDeviceContext;
diff --git a/flang/lib/Optimizer/Builder/CUFCommon.cpp b/flang/lib/Optimizer/Builder/CUFCommon.cpp
index 5f286c04a7ca0..d40ae70503350 100644
--- a/flang/lib/Optimizer/Builder/CUFCommon.cpp
+++ b/flang/lib/Optimizer/Builder/CUFCommon.cpp
@@ -43,7 +43,7 @@ bool cuf::isCUDADeviceContext(mlir::Operation *op) {
// for it.
// If the insertion point is inside an OpenACC region op, it is considered
// device context.
-bool cuf::isCUDADeviceContext(mlir::Region ®ion) {
+bool cuf::isCUDADeviceContext(mlir::Region ®ion, bool isStdParEnabled) {
if (region.getParentOfType<cuf::KernelOp>())
return true;
if (region.getParentOfType<mlir::acc::ComputeRegionOpInterface>())
@@ -56,6 +56,8 @@ bool cuf::isCUDADeviceContext(mlir::Region ®ion) {
cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
}
}
+ if (isStdParEnabled && region.getParentOfType<fir::DoConcurrentLoopOp>())
+ return true;
return false;
}
diff --git a/flang/lib/Semantics/assignment.cpp b/flang/lib/Semantics/assignment.cpp
index 43e23a9d8f60b..8bd123ba1e806 100644
--- a/flang/lib/Semantics/assignment.cpp
+++ b/flang/lib/Semantics/assignment.cpp
@@ -260,6 +260,20 @@ void AssignmentChecker::Enter(const parser::CUFKernelDoConstruct &x) {
void AssignmentChecker::Leave(const parser::CUFKernelDoConstruct &) {
--context_.value().deviceConstructDepth_;
}
+void AssignmentChecker::Enter(const parser::DoConstruct &x) {
+ if (x.IsDoConcurrent() &&
+ context().foldingContext().languageFeatures().IsEnabled(
+ common::LanguageFeature::StdPar)) {
+ ++context_.value().deviceConstructDepth_;
+ }
+}
+void AssignmentChecker::Leave(const parser::DoConstruct &x) {
+ if (x.IsDoConcurrent() &&
+ context().foldingContext().languageFeatures().IsEnabled(
+ common::LanguageFeature::StdPar)) {
+ --context_.value().deviceConstructDepth_;
+ }
+}
static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
const auto &beginBlockDirective =
std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
diff --git a/flang/lib/Semantics/assignment.h b/flang/lib/Semantics/assignment.h
index 4a1bb92037119..fb703c71c989a 100644
--- a/flang/lib/Semantics/assignment.h
+++ b/flang/lib/Semantics/assignment.h
@@ -21,6 +21,7 @@ struct MaskedElsewhereStmt;
struct PointerAssignmentStmt;
struct WhereConstructStmt;
struct WhereStmt;
+struct DoConstruct;
} // namespace Fortran::parser
namespace Fortran::semantics {
@@ -54,6 +55,8 @@ class AssignmentChecker : public virtual BaseChecker {
void Leave(const parser::OpenACCCombinedConstruct &);
void Enter(const parser::OpenACCLoopConstruct &);
void Leave(const parser::OpenACCLoopConstruct &);
+ void Enter(const parser::DoConstruct &);
+ void Leave(const parser::DoConstruct &);
SemanticsContext &context();
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index ed41c6fb16892..f6e6272cbcd7b 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -197,11 +197,11 @@ static void WarnUndefinedFunctionResult(
using StatementSemanticsPass1 = ExprChecker;
using StatementSemanticsPass2 = SemanticsVisitor<AllocateChecker,
- ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker, CoarrayChecker,
- DataChecker, DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker,
- MiscChecker, NamelistChecker, NullifyChecker, PurityChecker,
- ReturnStmtChecker, SelectRankConstructChecker, SelectTypeChecker,
- StopChecker>;
+ ArithmeticIfStmtChecker, CaseChecker, CoarrayChecker, DataChecker,
+ DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker, MiscChecker,
+ NamelistChecker, NullifyChecker, PurityChecker, ReturnStmtChecker,
+ SelectRankConstructChecker, SelectTypeChecker, StopChecker>;
+using StatementSemanticsPass3 = SemanticsVisitor<AssignmentChecker>;
static bool PerformStatementSemantics(
SemanticsContext &context, parser::Program &program) {
@@ -212,6 +212,7 @@ static bool PerformStatementSemantics(
StatementSemanticsPass1{context}.Walk(program);
StatementSemanticsPass2 pass2{context};
pass2.Walk(program);
+ StatementSemanticsPass3{context}.Walk(program);
if (context.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC)) {
SemanticsVisitor<AccStructureChecker>{context}.Walk(program);
}
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index d0032af415604..68a0202f951fe 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -403,3 +403,19 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub20()
! CHECK-NOT: cuf.data_transfer
! CHECK: hlfir.assign
+
+subroutine sub21()
+ real, allocatable,device:: a(:,:), b(:,:)
+ real:: s
+ integer:: i,j,N=16
+ allocate(a(N,N),b(N,N))
+ do concurrent(i=1:N, j=1:N) reduce(+:s)
+ b(i,j)=a(i,j)**2
+ s=s+b(i,j)
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub21()
+! CHECK: fir.do_concurrent.loop
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 59372a8eb58ed..25d072c93f22c 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -223,6 +223,10 @@ static llvm::cl::opt<bool> enableCUDA("fcuda",
llvm::cl::desc("enable CUDA Fortran"),
llvm::cl::init(false));
+static llvm::cl::opt<bool> enableStdPar("stdpar",
+ llvm::cl::desc("enable stdpar"),
+ llvm::cl::init(false));
+
static llvm::cl::opt<bool>
disableCUDAWarpFunction("fcuda-disable-warp-function",
llvm::cl::desc("Disable CUDA Warp Function"),
@@ -608,6 +612,10 @@ int main(int argc, char **argv) {
options.features.Enable(Fortran::common::LanguageFeature::CUDA);
}
+ if (enableStdPar) {
+ options.features.Enable(Fortran::common::LanguageFeature::StdPar);
+ }
+
if (disableCUDAWarpFunction) {
options.features.Enable(
Fortran::common::LanguageFeature::CudaWarpMatchFunction, false);
>From c4844a45b3327af57574a338c280f965b3c4f49f Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 7 Jul 2025 21:35:13 -0700
Subject: [PATCH 2/3] Move check to CUDAChecker
---
flang/lib/Semantics/assignment.cpp | 70 ------------------------------
flang/lib/Semantics/assignment.h | 10 -----
flang/lib/Semantics/check-cuda.cpp | 64 +++++++++++++++++++++++++--
flang/lib/Semantics/check-cuda.h | 10 ++++-
flang/lib/Semantics/semantics.cpp | 11 +++--
5 files changed, 75 insertions(+), 90 deletions(-)
diff --git a/flang/lib/Semantics/assignment.cpp b/flang/lib/Semantics/assignment.cpp
index 8bd123ba1e806..88e08887160d9 100644
--- a/flang/lib/Semantics/assignment.cpp
+++ b/flang/lib/Semantics/assignment.cpp
@@ -42,7 +42,6 @@ class AssignmentContext {
void Analyze(const parser::AssignmentStmt &);
void Analyze(const parser::PointerAssignmentStmt &);
void Analyze(const parser::ConcurrentControl &);
- int deviceConstructDepth_{0};
SemanticsContext &context() { return context_; }
private:
@@ -97,21 +96,6 @@ void AssignmentContext::Analyze(const parser::AssignmentStmt &stmt) {
if (whereDepth_ > 0) {
CheckShape(lhsLoc, &lhs);
}
- if (context_.foldingContext().languageFeatures().IsEnabled(
- common::LanguageFeature::CUDA)) {
- const auto &scope{context_.FindScope(lhsLoc)};
- const Scope &progUnit{GetProgramUnitContaining(scope)};
- if (!IsCUDADeviceContext(&progUnit) && deviceConstructDepth_ == 0) {
- if (Fortran::evaluate::HasCUDADeviceAttrs(lhs) &&
- Fortran::evaluate::HasCUDAImplicitTransfer(rhs)) {
- if (GetNbOfCUDAManagedOrUnifiedSymbols(lhs) == 1 &&
- GetNbOfCUDAManagedOrUnifiedSymbols(rhs) == 1 &&
- GetNbOfCUDADeviceSymbols(rhs) == 1)
- return; // This is a special case handled on the host.
- context_.Say(lhsLoc, "Unsupported CUDA data transfer"_err_en_US);
- }
- }
- }
}
}
@@ -254,60 +238,6 @@ void AssignmentChecker::Enter(const parser::MaskedElsewhereStmt &x) {
void AssignmentChecker::Leave(const parser::MaskedElsewhereStmt &) {
context_.value().PopWhereContext();
}
-void AssignmentChecker::Enter(const parser::CUFKernelDoConstruct &x) {
- ++context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Leave(const parser::CUFKernelDoConstruct &) {
- --context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Enter(const parser::DoConstruct &x) {
- if (x.IsDoConcurrent() &&
- context().foldingContext().languageFeatures().IsEnabled(
- common::LanguageFeature::StdPar)) {
- ++context_.value().deviceConstructDepth_;
- }
-}
-void AssignmentChecker::Leave(const parser::DoConstruct &x) {
- if (x.IsDoConcurrent() &&
- context().foldingContext().languageFeatures().IsEnabled(
- common::LanguageFeature::StdPar)) {
- --context_.value().deviceConstructDepth_;
- }
-}
-static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
- const auto &beginBlockDirective =
- std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
- const auto &blockDirective =
- std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
- if (blockDirective.v == llvm::acc::ACCD_parallel ||
- blockDirective.v == llvm::acc::ACCD_serial ||
- blockDirective.v == llvm::acc::ACCD_kernels) {
- return true;
- }
- return false;
-}
-void AssignmentChecker::Enter(const parser::OpenACCBlockConstruct &x) {
- if (IsOpenACCComputeConstruct(x)) {
- ++context_.value().deviceConstructDepth_;
- }
-}
-void AssignmentChecker::Leave(const parser::OpenACCBlockConstruct &x) {
- if (IsOpenACCComputeConstruct(x)) {
- --context_.value().deviceConstructDepth_;
- }
-}
-void AssignmentChecker::Enter(const parser::OpenACCCombinedConstruct &) {
- ++context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Leave(const parser::OpenACCCombinedConstruct &) {
- --context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Enter(const parser::OpenACCLoopConstruct &) {
- ++context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Leave(const parser::OpenACCLoopConstruct &) {
- --context_.value().deviceConstructDepth_;
-}
} // namespace Fortran::semantics
template class Fortran::common::Indirection<
diff --git a/flang/lib/Semantics/assignment.h b/flang/lib/Semantics/assignment.h
index fb703c71c989a..54b3be2da06e8 100644
--- a/flang/lib/Semantics/assignment.h
+++ b/flang/lib/Semantics/assignment.h
@@ -47,16 +47,6 @@ class AssignmentChecker : public virtual BaseChecker {
void Leave(const parser::EndWhereStmt &);
void Enter(const parser::MaskedElsewhereStmt &);
void Leave(const parser::MaskedElsewhereStmt &);
- void Enter(const parser::CUFKernelDoConstruct &);
- void Leave(const parser::CUFKernelDoConstruct &);
- void Enter(const parser::OpenACCBlockConstruct &);
- void Leave(const parser::OpenACCBlockConstruct &);
- void Enter(const parser::OpenACCCombinedConstruct &);
- void Leave(const parser::OpenACCCombinedConstruct &);
- void Enter(const parser::OpenACCLoopConstruct &);
- void Leave(const parser::OpenACCLoopConstruct &);
- void Enter(const parser::DoConstruct &);
- void Leave(const parser::DoConstruct &);
SemanticsContext &context();
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 8decfb0149829..c399fdb5a48de 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -685,18 +685,67 @@ void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) {
std::get<std::list<parser::CUFReduction>>(directive.t)) {
CheckReduce(context_, reduce);
}
- inCUFKernelDoConstruct_ = true;
+ ++deviceConstructDepth_;
+}
+
+static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
+ const auto &beginBlockDirective =
+ std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
+ const auto &blockDirective =
+ std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
+ if (blockDirective.v == llvm::acc::ACCD_parallel ||
+ blockDirective.v == llvm::acc::ACCD_serial ||
+ blockDirective.v == llvm::acc::ACCD_kernels) {
+ return true;
+ }
+ return false;
}
void CUDAChecker::Leave(const parser::CUFKernelDoConstruct &) {
- inCUFKernelDoConstruct_ = false;
+ --deviceConstructDepth_;
+}
+void CUDAChecker::Enter(const parser::OpenACCBlockConstruct &x) {
+ if (IsOpenACCComputeConstruct(x)) {
+ ++deviceConstructDepth_;
+ }
+}
+void CUDAChecker::Leave(const parser::OpenACCBlockConstruct &x) {
+ if (IsOpenACCComputeConstruct(x)) {
+ --deviceConstructDepth_;
+ }
+}
+void CUDAChecker::Enter(const parser::OpenACCCombinedConstruct &) {
+ ++deviceConstructDepth_;
+}
+void CUDAChecker::Leave(const parser::OpenACCCombinedConstruct &) {
+ --deviceConstructDepth_;
+}
+void CUDAChecker::Enter(const parser::OpenACCLoopConstruct &) {
+ ++deviceConstructDepth_;
+}
+void CUDAChecker::Leave(const parser::OpenACCLoopConstruct &) {
+ --deviceConstructDepth_;
+}
+void CUDAChecker::Enter(const parser::DoConstruct &x) {
+ if (x.IsDoConcurrent() &&
+ context_.foldingContext().languageFeatures().IsEnabled(
+ common::LanguageFeature::StdPar)) {
+ ++deviceConstructDepth_;
+ }
+}
+void CUDAChecker::Leave(const parser::DoConstruct &x) {
+ if (x.IsDoConcurrent() &&
+ context_.foldingContext().languageFeatures().IsEnabled(
+ common::LanguageFeature::StdPar)) {
+ --deviceConstructDepth_;
+ }
}
void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
auto lhsLoc{std::get<parser::Variable>(x.t).GetSource()};
const auto &scope{context_.FindScope(lhsLoc)};
const Scope &progUnit{GetProgramUnitContaining(scope)};
- if (IsCUDADeviceContext(&progUnit) || inCUFKernelDoConstruct_) {
+ if (IsCUDADeviceContext(&progUnit) || deviceConstructDepth_ > 0) {
return; // Data transfer with assignment is only perform on host.
}
@@ -714,6 +763,15 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
context_.Say(lhsLoc,
"More than one reference to a CUDA object on the right hand side of the assigment"_err_en_US);
}
+
+ if (Fortran::evaluate::HasCUDADeviceAttrs(assign->lhs) &&
+ Fortran::evaluate::HasCUDAImplicitTransfer(assign->rhs)) {
+ if (GetNbOfCUDAManagedOrUnifiedSymbols(assign->lhs) == 1 &&
+ GetNbOfCUDAManagedOrUnifiedSymbols(assign->rhs) == 1 &&
+ GetNbOfCUDADeviceSymbols(assign->rhs) == 1)
+ return; // This is a special case handled on the host.
+ context_.Say(lhsLoc, "Unsupported CUDA data transfer"_err_en_US);
+ }
}
} // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/check-cuda.h b/flang/lib/Semantics/check-cuda.h
index 222a2ee04b57c..10000253ffe5a 100644
--- a/flang/lib/Semantics/check-cuda.h
+++ b/flang/lib/Semantics/check-cuda.h
@@ -41,10 +41,18 @@ class CUDAChecker : public virtual BaseChecker {
void Enter(const parser::CUFKernelDoConstruct &);
void Leave(const parser::CUFKernelDoConstruct &);
void Enter(const parser::AssignmentStmt &);
+ void Enter(const parser::OpenACCBlockConstruct &);
+ void Leave(const parser::OpenACCBlockConstruct &);
+ void Enter(const parser::OpenACCCombinedConstruct &);
+ void Leave(const parser::OpenACCCombinedConstruct &);
+ void Enter(const parser::OpenACCLoopConstruct &);
+ void Leave(const parser::OpenACCLoopConstruct &);
+ void Enter(const parser::DoConstruct &);
+ void Leave(const parser::DoConstruct &);
private:
SemanticsContext &context_;
- bool inCUFKernelDoConstruct_ = false;
+ int deviceConstructDepth_{0};
};
bool CanonicalizeCUDA(parser::Program &);
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index f6e6272cbcd7b..ed41c6fb16892 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -197,11 +197,11 @@ static void WarnUndefinedFunctionResult(
using StatementSemanticsPass1 = ExprChecker;
using StatementSemanticsPass2 = SemanticsVisitor<AllocateChecker,
- ArithmeticIfStmtChecker, CaseChecker, CoarrayChecker, DataChecker,
- DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker, MiscChecker,
- NamelistChecker, NullifyChecker, PurityChecker, ReturnStmtChecker,
- SelectRankConstructChecker, SelectTypeChecker, StopChecker>;
-using StatementSemanticsPass3 = SemanticsVisitor<AssignmentChecker>;
+ ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker, CoarrayChecker,
+ DataChecker, DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker,
+ MiscChecker, NamelistChecker, NullifyChecker, PurityChecker,
+ ReturnStmtChecker, SelectRankConstructChecker, SelectTypeChecker,
+ StopChecker>;
static bool PerformStatementSemantics(
SemanticsContext &context, parser::Program &program) {
@@ -212,7 +212,6 @@ static bool PerformStatementSemantics(
StatementSemanticsPass1{context}.Walk(program);
StatementSemanticsPass2 pass2{context};
pass2.Walk(program);
- StatementSemanticsPass3{context}.Walk(program);
if (context.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC)) {
SemanticsVisitor<AccStructureChecker>{context}.Walk(program);
}
>From bbdd46de5f2a0fd741ac7cd5ef72e27a15208a77 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 7 Jul 2025 21:38:01 -0700
Subject: [PATCH 3/3] Rename language feature
---
flang/include/flang/Optimizer/Builder/CUFCommon.h | 3 ++-
flang/include/flang/Support/Fortran-features.h | 2 +-
flang/lib/Lower/Bridge.cpp | 5 +++--
flang/lib/Optimizer/Builder/CUFCommon.cpp | 6 ++++--
flang/lib/Semantics/check-cuda.cpp | 4 ++--
flang/tools/bbc/bbc.cpp | 12 +++++++-----
6 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/CUFCommon.h b/flang/include/flang/Optimizer/Builder/CUFCommon.h
index cfd8a2305d031..5c56dd6b695f8 100644
--- a/flang/include/flang/Optimizer/Builder/CUFCommon.h
+++ b/flang/include/flang/Optimizer/Builder/CUFCommon.h
@@ -27,7 +27,8 @@ mlir::gpu::GPUModuleOp getOrCreateGPUModule(mlir::ModuleOp mod,
mlir::SymbolTable &symTab);
bool isCUDADeviceContext(mlir::Operation *op);
-bool isCUDADeviceContext(mlir::Region &, bool isStdParEnabled = false);
+bool isCUDADeviceContext(mlir::Region &,
+ bool isDoConcurrentOffloadEnabled = false);
bool isRegisteredDeviceGlobal(fir::GlobalOp op);
bool isRegisteredDeviceAttr(std::optional<cuf::DataAttribute> attr);
diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h
index 393ee7d0687c6..857de9479e4e5 100644
--- a/flang/include/flang/Support/Fortran-features.h
+++ b/flang/include/flang/Support/Fortran-features.h
@@ -55,7 +55,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
SavedLocalInSpecExpr, PrintNamelist, AssumedRankPassedToNonAssumedRank,
IgnoreIrrelevantAttributes, Unsigned, AmbiguousStructureConstructor,
ContiguousOkForSeqAssociation, ForwardRefExplicitTypeDummy,
- InaccessibleDeferredOverride, CudaWarpMatchFunction, StdPar)
+ InaccessibleDeferredOverride, CudaWarpMatchFunction, DoConcurrentOffload)
// Portability and suspicious usage warnings
ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 1be64ea26d7ea..696473605a4e0 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4887,8 +4887,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
fir::FirOpBuilder &builder = getFirOpBuilder();
bool isInDeviceContext = cuf::isCUDADeviceContext(
- builder.getRegion(), getFoldingContext().languageFeatures().IsEnabled(
- Fortran::common::LanguageFeature::StdPar));
+ builder.getRegion(),
+ getFoldingContext().languageFeatures().IsEnabled(
+ Fortran::common::LanguageFeature::DoConcurrentOffload));
bool isCUDATransfer =
IsCUDADataTransfer(assign.lhs, assign.rhs) && !isInDeviceContext;
diff --git a/flang/lib/Optimizer/Builder/CUFCommon.cpp b/flang/lib/Optimizer/Builder/CUFCommon.cpp
index d40ae70503350..dcbf4991907bf 100644
--- a/flang/lib/Optimizer/Builder/CUFCommon.cpp
+++ b/flang/lib/Optimizer/Builder/CUFCommon.cpp
@@ -43,7 +43,8 @@ bool cuf::isCUDADeviceContext(mlir::Operation *op) {
// for it.
// If the insertion point is inside an OpenACC region op, it is considered
// device context.
-bool cuf::isCUDADeviceContext(mlir::Region ®ion, bool isStdParEnabled) {
+bool cuf::isCUDADeviceContext(mlir::Region ®ion,
+ bool isDoConcurrentOffloadEnabled) {
if (region.getParentOfType<cuf::KernelOp>())
return true;
if (region.getParentOfType<mlir::acc::ComputeRegionOpInterface>())
@@ -56,7 +57,8 @@ bool cuf::isCUDADeviceContext(mlir::Region ®ion, bool isStdParEnabled) {
cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
}
}
- if (isStdParEnabled && region.getParentOfType<fir::DoConcurrentLoopOp>())
+ if (isDoConcurrentOffloadEnabled &&
+ region.getParentOfType<fir::DoConcurrentLoopOp>())
return true;
return false;
}
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index c399fdb5a48de..8103402598f5f 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -729,14 +729,14 @@ void CUDAChecker::Leave(const parser::OpenACCLoopConstruct &) {
void CUDAChecker::Enter(const parser::DoConstruct &x) {
if (x.IsDoConcurrent() &&
context_.foldingContext().languageFeatures().IsEnabled(
- common::LanguageFeature::StdPar)) {
+ common::LanguageFeature::DoConcurrentOffload)) {
++deviceConstructDepth_;
}
}
void CUDAChecker::Leave(const parser::DoConstruct &x) {
if (x.IsDoConcurrent() &&
context_.foldingContext().languageFeatures().IsEnabled(
- common::LanguageFeature::StdPar)) {
+ common::LanguageFeature::DoConcurrentOffload)) {
--deviceConstructDepth_;
}
}
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 25d072c93f22c..d216069654bc4 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -223,9 +223,10 @@ static llvm::cl::opt<bool> enableCUDA("fcuda",
llvm::cl::desc("enable CUDA Fortran"),
llvm::cl::init(false));
-static llvm::cl::opt<bool> enableStdPar("stdpar",
- llvm::cl::desc("enable stdpar"),
- llvm::cl::init(false));
+static llvm::cl::opt<bool>
+ enableDoConcurrentOffload("fdoconcurrent-offload",
+ llvm::cl::desc("enable do concurrent offload"),
+ llvm::cl::init(false));
static llvm::cl::opt<bool>
disableCUDAWarpFunction("fcuda-disable-warp-function",
@@ -612,8 +613,9 @@ int main(int argc, char **argv) {
options.features.Enable(Fortran::common::LanguageFeature::CUDA);
}
- if (enableStdPar) {
- options.features.Enable(Fortran::common::LanguageFeature::StdPar);
+ if (enableDoConcurrentOffload) {
+ options.features.Enable(
+ Fortran::common::LanguageFeature::DoConcurrentOffload);
}
if (disableCUDAWarpFunction) {
More information about the flang-commits
mailing list