[flang-commits] [flang] [flang][cuda] Do not produce data transfer in offloaded do concurrent (PR #147435)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Mon Jul 7 21:57:22 PDT 2025


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/147435

>From f6ae5598ac9967e407f74245cc47fff4e35ebe0f Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 7 Jul 2025 17:21:42 -0700
Subject: [PATCH 1/3] [flang][cuda] Do not produce data transfer in offloaded
 do concurrent

---
 .../include/flang/Optimizer/Builder/CUFCommon.h  |  2 +-
 flang/include/flang/Support/Fortran-features.h   |  2 +-
 flang/lib/Lower/Bridge.cpp                       |  4 +++-
 flang/lib/Optimizer/Builder/CUFCommon.cpp        |  4 +++-
 flang/lib/Semantics/assignment.cpp               | 14 ++++++++++++++
 flang/lib/Semantics/assignment.h                 |  3 +++
 flang/lib/Semantics/semantics.cpp                | 11 ++++++-----
 flang/test/Lower/CUDA/cuda-data-transfer.cuf     | 16 ++++++++++++++++
 flang/tools/bbc/bbc.cpp                          |  8 ++++++++
 9 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/CUFCommon.h b/flang/include/flang/Optimizer/Builder/CUFCommon.h
index 65b9cce1d2021..cfd8a2305d031 100644
--- a/flang/include/flang/Optimizer/Builder/CUFCommon.h
+++ b/flang/include/flang/Optimizer/Builder/CUFCommon.h
@@ -27,7 +27,7 @@ mlir::gpu::GPUModuleOp getOrCreateGPUModule(mlir::ModuleOp mod,
                                             mlir::SymbolTable &symTab);
 
 bool isCUDADeviceContext(mlir::Operation *op);
-bool isCUDADeviceContext(mlir::Region &);
+bool isCUDADeviceContext(mlir::Region &, bool isStdParEnabled = false);
 bool isRegisteredDeviceGlobal(fir::GlobalOp op);
 bool isRegisteredDeviceAttr(std::optional<cuf::DataAttribute> attr);
 
diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h
index 8a2b7b29a5233..393ee7d0687c6 100644
--- a/flang/include/flang/Support/Fortran-features.h
+++ b/flang/include/flang/Support/Fortran-features.h
@@ -55,7 +55,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     SavedLocalInSpecExpr, PrintNamelist, AssumedRankPassedToNonAssumedRank,
     IgnoreIrrelevantAttributes, Unsigned, AmbiguousStructureConstructor,
     ContiguousOkForSeqAssociation, ForwardRefExplicitTypeDummy,
-    InaccessibleDeferredOverride, CudaWarpMatchFunction)
+    InaccessibleDeferredOverride, CudaWarpMatchFunction, StdPar)
 
 // Portability and suspicious usage warnings
 ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ff35840a6668c..1be64ea26d7ea 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4886,7 +4886,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::Location loc = getCurrentLocation();
     fir::FirOpBuilder &builder = getFirOpBuilder();
 
-    bool isInDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
+    bool isInDeviceContext = cuf::isCUDADeviceContext(
+        builder.getRegion(), getFoldingContext().languageFeatures().IsEnabled(
+                                 Fortran::common::LanguageFeature::StdPar));
 
     bool isCUDATransfer =
         IsCUDADataTransfer(assign.lhs, assign.rhs) && !isInDeviceContext;
diff --git a/flang/lib/Optimizer/Builder/CUFCommon.cpp b/flang/lib/Optimizer/Builder/CUFCommon.cpp
index 5f286c04a7ca0..d40ae70503350 100644
--- a/flang/lib/Optimizer/Builder/CUFCommon.cpp
+++ b/flang/lib/Optimizer/Builder/CUFCommon.cpp
@@ -43,7 +43,7 @@ bool cuf::isCUDADeviceContext(mlir::Operation *op) {
 // for it.
 // If the insertion point is inside an OpenACC region op, it is considered
 // device context.
-bool cuf::isCUDADeviceContext(mlir::Region &region) {
+bool cuf::isCUDADeviceContext(mlir::Region &region, bool isStdParEnabled) {
   if (region.getParentOfType<cuf::KernelOp>())
     return true;
   if (region.getParentOfType<mlir::acc::ComputeRegionOpInterface>())
@@ -56,6 +56,8 @@ bool cuf::isCUDADeviceContext(mlir::Region &region) {
              cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
     }
   }
+  if (isStdParEnabled && region.getParentOfType<fir::DoConcurrentLoopOp>())
+    return true;
   return false;
 }
 
diff --git a/flang/lib/Semantics/assignment.cpp b/flang/lib/Semantics/assignment.cpp
index 43e23a9d8f60b..8bd123ba1e806 100644
--- a/flang/lib/Semantics/assignment.cpp
+++ b/flang/lib/Semantics/assignment.cpp
@@ -260,6 +260,20 @@ void AssignmentChecker::Enter(const parser::CUFKernelDoConstruct &x) {
 void AssignmentChecker::Leave(const parser::CUFKernelDoConstruct &) {
   --context_.value().deviceConstructDepth_;
 }
+void AssignmentChecker::Enter(const parser::DoConstruct &x) {
+  if (x.IsDoConcurrent() &&
+      context().foldingContext().languageFeatures().IsEnabled(
+          common::LanguageFeature::StdPar)) {
+    ++context_.value().deviceConstructDepth_;
+  }
+}
+void AssignmentChecker::Leave(const parser::DoConstruct &x) {
+  if (x.IsDoConcurrent() &&
+      context().foldingContext().languageFeatures().IsEnabled(
+          common::LanguageFeature::StdPar)) {
+    --context_.value().deviceConstructDepth_;
+  }
+}
 static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
   const auto &beginBlockDirective =
       std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
diff --git a/flang/lib/Semantics/assignment.h b/flang/lib/Semantics/assignment.h
index 4a1bb92037119..fb703c71c989a 100644
--- a/flang/lib/Semantics/assignment.h
+++ b/flang/lib/Semantics/assignment.h
@@ -21,6 +21,7 @@ struct MaskedElsewhereStmt;
 struct PointerAssignmentStmt;
 struct WhereConstructStmt;
 struct WhereStmt;
+struct DoConstruct;
 } // namespace Fortran::parser
 
 namespace Fortran::semantics {
@@ -54,6 +55,8 @@ class AssignmentChecker : public virtual BaseChecker {
   void Leave(const parser::OpenACCCombinedConstruct &);
   void Enter(const parser::OpenACCLoopConstruct &);
   void Leave(const parser::OpenACCLoopConstruct &);
+  void Enter(const parser::DoConstruct &);
+  void Leave(const parser::DoConstruct &);
 
   SemanticsContext &context();
 
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index ed41c6fb16892..f6e6272cbcd7b 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -197,11 +197,11 @@ static void WarnUndefinedFunctionResult(
 
 using StatementSemanticsPass1 = ExprChecker;
 using StatementSemanticsPass2 = SemanticsVisitor<AllocateChecker,
-    ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker, CoarrayChecker,
-    DataChecker, DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker,
-    MiscChecker, NamelistChecker, NullifyChecker, PurityChecker,
-    ReturnStmtChecker, SelectRankConstructChecker, SelectTypeChecker,
-    StopChecker>;
+    ArithmeticIfStmtChecker, CaseChecker, CoarrayChecker, DataChecker,
+    DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker, MiscChecker,
+    NamelistChecker, NullifyChecker, PurityChecker, ReturnStmtChecker,
+    SelectRankConstructChecker, SelectTypeChecker, StopChecker>;
+using StatementSemanticsPass3 = SemanticsVisitor<AssignmentChecker>;
 
 static bool PerformStatementSemantics(
     SemanticsContext &context, parser::Program &program) {
@@ -212,6 +212,7 @@ static bool PerformStatementSemantics(
   StatementSemanticsPass1{context}.Walk(program);
   StatementSemanticsPass2 pass2{context};
   pass2.Walk(program);
+  StatementSemanticsPass3{context}.Walk(program);
   if (context.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC)) {
     SemanticsVisitor<AccStructureChecker>{context}.Walk(program);
   }
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index d0032af415604..68a0202f951fe 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -403,3 +403,19 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPsub20()
 ! CHECK-NOT: cuf.data_transfer
 ! CHECK: hlfir.assign
+
+subroutine sub21()
+  real, allocatable,device:: a(:,:), b(:,:)
+  real:: s
+  integer:: i,j,N=16
+  allocate(a(N,N),b(N,N))
+  do concurrent(i=1:N, j=1:N) reduce(+:s)
+    b(i,j)=a(i,j)**2
+    s=s+b(i,j)
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub21()
+! CHECK: fir.do_concurrent.loop
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 59372a8eb58ed..25d072c93f22c 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -223,6 +223,10 @@ static llvm::cl::opt<bool> enableCUDA("fcuda",
                                       llvm::cl::desc("enable CUDA Fortran"),
                                       llvm::cl::init(false));
 
+static llvm::cl::opt<bool> enableStdPar("stdpar",
+                                        llvm::cl::desc("enable stdpar"),
+                                        llvm::cl::init(false));
+
 static llvm::cl::opt<bool>
     disableCUDAWarpFunction("fcuda-disable-warp-function",
                             llvm::cl::desc("Disable CUDA Warp Function"),
@@ -608,6 +612,10 @@ int main(int argc, char **argv) {
     options.features.Enable(Fortran::common::LanguageFeature::CUDA);
   }
 
+  if (enableStdPar) {
+    options.features.Enable(Fortran::common::LanguageFeature::StdPar);
+  }
+
   if (disableCUDAWarpFunction) {
     options.features.Enable(
         Fortran::common::LanguageFeature::CudaWarpMatchFunction, false);

>From c4844a45b3327af57574a338c280f965b3c4f49f Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 7 Jul 2025 21:35:13 -0700
Subject: [PATCH 2/3] Move check to CUDAChecker

---
 flang/lib/Semantics/assignment.cpp | 70 ------------------------------
 flang/lib/Semantics/assignment.h   | 10 -----
 flang/lib/Semantics/check-cuda.cpp | 64 +++++++++++++++++++++++++--
 flang/lib/Semantics/check-cuda.h   | 10 ++++-
 flang/lib/Semantics/semantics.cpp  | 11 +++--
 5 files changed, 75 insertions(+), 90 deletions(-)

diff --git a/flang/lib/Semantics/assignment.cpp b/flang/lib/Semantics/assignment.cpp
index 8bd123ba1e806..88e08887160d9 100644
--- a/flang/lib/Semantics/assignment.cpp
+++ b/flang/lib/Semantics/assignment.cpp
@@ -42,7 +42,6 @@ class AssignmentContext {
   void Analyze(const parser::AssignmentStmt &);
   void Analyze(const parser::PointerAssignmentStmt &);
   void Analyze(const parser::ConcurrentControl &);
-  int deviceConstructDepth_{0};
   SemanticsContext &context() { return context_; }
 
 private:
@@ -97,21 +96,6 @@ void AssignmentContext::Analyze(const parser::AssignmentStmt &stmt) {
     if (whereDepth_ > 0) {
       CheckShape(lhsLoc, &lhs);
     }
-    if (context_.foldingContext().languageFeatures().IsEnabled(
-            common::LanguageFeature::CUDA)) {
-      const auto &scope{context_.FindScope(lhsLoc)};
-      const Scope &progUnit{GetProgramUnitContaining(scope)};
-      if (!IsCUDADeviceContext(&progUnit) && deviceConstructDepth_ == 0) {
-        if (Fortran::evaluate::HasCUDADeviceAttrs(lhs) &&
-            Fortran::evaluate::HasCUDAImplicitTransfer(rhs)) {
-          if (GetNbOfCUDAManagedOrUnifiedSymbols(lhs) == 1 &&
-              GetNbOfCUDAManagedOrUnifiedSymbols(rhs) == 1 &&
-              GetNbOfCUDADeviceSymbols(rhs) == 1)
-            return; // This is a special case handled on the host.
-          context_.Say(lhsLoc, "Unsupported CUDA data transfer"_err_en_US);
-        }
-      }
-    }
   }
 }
 
@@ -254,60 +238,6 @@ void AssignmentChecker::Enter(const parser::MaskedElsewhereStmt &x) {
 void AssignmentChecker::Leave(const parser::MaskedElsewhereStmt &) {
   context_.value().PopWhereContext();
 }
-void AssignmentChecker::Enter(const parser::CUFKernelDoConstruct &x) {
-  ++context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Leave(const parser::CUFKernelDoConstruct &) {
-  --context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Enter(const parser::DoConstruct &x) {
-  if (x.IsDoConcurrent() &&
-      context().foldingContext().languageFeatures().IsEnabled(
-          common::LanguageFeature::StdPar)) {
-    ++context_.value().deviceConstructDepth_;
-  }
-}
-void AssignmentChecker::Leave(const parser::DoConstruct &x) {
-  if (x.IsDoConcurrent() &&
-      context().foldingContext().languageFeatures().IsEnabled(
-          common::LanguageFeature::StdPar)) {
-    --context_.value().deviceConstructDepth_;
-  }
-}
-static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
-  const auto &beginBlockDirective =
-      std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
-  const auto &blockDirective =
-      std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
-  if (blockDirective.v == llvm::acc::ACCD_parallel ||
-      blockDirective.v == llvm::acc::ACCD_serial ||
-      blockDirective.v == llvm::acc::ACCD_kernels) {
-    return true;
-  }
-  return false;
-}
-void AssignmentChecker::Enter(const parser::OpenACCBlockConstruct &x) {
-  if (IsOpenACCComputeConstruct(x)) {
-    ++context_.value().deviceConstructDepth_;
-  }
-}
-void AssignmentChecker::Leave(const parser::OpenACCBlockConstruct &x) {
-  if (IsOpenACCComputeConstruct(x)) {
-    --context_.value().deviceConstructDepth_;
-  }
-}
-void AssignmentChecker::Enter(const parser::OpenACCCombinedConstruct &) {
-  ++context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Leave(const parser::OpenACCCombinedConstruct &) {
-  --context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Enter(const parser::OpenACCLoopConstruct &) {
-  ++context_.value().deviceConstructDepth_;
-}
-void AssignmentChecker::Leave(const parser::OpenACCLoopConstruct &) {
-  --context_.value().deviceConstructDepth_;
-}
 
 } // namespace Fortran::semantics
 template class Fortran::common::Indirection<
diff --git a/flang/lib/Semantics/assignment.h b/flang/lib/Semantics/assignment.h
index fb703c71c989a..54b3be2da06e8 100644
--- a/flang/lib/Semantics/assignment.h
+++ b/flang/lib/Semantics/assignment.h
@@ -47,16 +47,6 @@ class AssignmentChecker : public virtual BaseChecker {
   void Leave(const parser::EndWhereStmt &);
   void Enter(const parser::MaskedElsewhereStmt &);
   void Leave(const parser::MaskedElsewhereStmt &);
-  void Enter(const parser::CUFKernelDoConstruct &);
-  void Leave(const parser::CUFKernelDoConstruct &);
-  void Enter(const parser::OpenACCBlockConstruct &);
-  void Leave(const parser::OpenACCBlockConstruct &);
-  void Enter(const parser::OpenACCCombinedConstruct &);
-  void Leave(const parser::OpenACCCombinedConstruct &);
-  void Enter(const parser::OpenACCLoopConstruct &);
-  void Leave(const parser::OpenACCLoopConstruct &);
-  void Enter(const parser::DoConstruct &);
-  void Leave(const parser::DoConstruct &);
 
   SemanticsContext &context();
 
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 8decfb0149829..c399fdb5a48de 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -685,18 +685,67 @@ void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) {
       std::get<std::list<parser::CUFReduction>>(directive.t)) {
     CheckReduce(context_, reduce);
   }
-  inCUFKernelDoConstruct_ = true;
+  ++deviceConstructDepth_;
+}
+
+static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
+  const auto &beginBlockDirective =
+      std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
+  const auto &blockDirective =
+      std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
+  if (blockDirective.v == llvm::acc::ACCD_parallel ||
+      blockDirective.v == llvm::acc::ACCD_serial ||
+      blockDirective.v == llvm::acc::ACCD_kernels) {
+    return true;
+  }
+  return false;
 }
 
 void CUDAChecker::Leave(const parser::CUFKernelDoConstruct &) {
-  inCUFKernelDoConstruct_ = false;
+  --deviceConstructDepth_;
+}
+void CUDAChecker::Enter(const parser::OpenACCBlockConstruct &x) {
+  if (IsOpenACCComputeConstruct(x)) {
+    ++deviceConstructDepth_;
+  }
+}
+void CUDAChecker::Leave(const parser::OpenACCBlockConstruct &x) {
+  if (IsOpenACCComputeConstruct(x)) {
+    --deviceConstructDepth_;
+  }
+}
+void CUDAChecker::Enter(const parser::OpenACCCombinedConstruct &) {
+  ++deviceConstructDepth_;
+}
+void CUDAChecker::Leave(const parser::OpenACCCombinedConstruct &) {
+  --deviceConstructDepth_;
+}
+void CUDAChecker::Enter(const parser::OpenACCLoopConstruct &) {
+  ++deviceConstructDepth_;
+}
+void CUDAChecker::Leave(const parser::OpenACCLoopConstruct &) {
+  --deviceConstructDepth_;
+}
+void CUDAChecker::Enter(const parser::DoConstruct &x) {
+  if (x.IsDoConcurrent() &&
+      context_.foldingContext().languageFeatures().IsEnabled(
+          common::LanguageFeature::StdPar)) {
+    ++deviceConstructDepth_;
+  }
+}
+void CUDAChecker::Leave(const parser::DoConstruct &x) {
+  if (x.IsDoConcurrent() &&
+      context_.foldingContext().languageFeatures().IsEnabled(
+          common::LanguageFeature::StdPar)) {
+    --deviceConstructDepth_;
+  }
 }
 
 void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
   auto lhsLoc{std::get<parser::Variable>(x.t).GetSource()};
   const auto &scope{context_.FindScope(lhsLoc)};
   const Scope &progUnit{GetProgramUnitContaining(scope)};
-  if (IsCUDADeviceContext(&progUnit) || inCUFKernelDoConstruct_) {
+  if (IsCUDADeviceContext(&progUnit) || deviceConstructDepth_ > 0) {
     return; // Data transfer with assignment is only perform on host.
   }
 
@@ -714,6 +763,15 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
     context_.Say(lhsLoc,
         "More than one reference to a CUDA object on the right hand side of the assigment"_err_en_US);
   }
+
+  if (Fortran::evaluate::HasCUDADeviceAttrs(assign->lhs) &&
+      Fortran::evaluate::HasCUDAImplicitTransfer(assign->rhs)) {
+    if (GetNbOfCUDAManagedOrUnifiedSymbols(assign->lhs) == 1 &&
+        GetNbOfCUDAManagedOrUnifiedSymbols(assign->rhs) == 1 &&
+        GetNbOfCUDADeviceSymbols(assign->rhs) == 1)
+      return; // This is a special case handled on the host.
+    context_.Say(lhsLoc, "Unsupported CUDA data transfer"_err_en_US);
+  }
 }
 
 } // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/check-cuda.h b/flang/lib/Semantics/check-cuda.h
index 222a2ee04b57c..10000253ffe5a 100644
--- a/flang/lib/Semantics/check-cuda.h
+++ b/flang/lib/Semantics/check-cuda.h
@@ -41,10 +41,18 @@ class CUDAChecker : public virtual BaseChecker {
   void Enter(const parser::CUFKernelDoConstruct &);
   void Leave(const parser::CUFKernelDoConstruct &);
   void Enter(const parser::AssignmentStmt &);
+  void Enter(const parser::OpenACCBlockConstruct &);
+  void Leave(const parser::OpenACCBlockConstruct &);
+  void Enter(const parser::OpenACCCombinedConstruct &);
+  void Leave(const parser::OpenACCCombinedConstruct &);
+  void Enter(const parser::OpenACCLoopConstruct &);
+  void Leave(const parser::OpenACCLoopConstruct &);
+  void Enter(const parser::DoConstruct &);
+  void Leave(const parser::DoConstruct &);
 
 private:
   SemanticsContext &context_;
-  bool inCUFKernelDoConstruct_ = false;
+  int deviceConstructDepth_{0};
 };
 
 bool CanonicalizeCUDA(parser::Program &);
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index f6e6272cbcd7b..ed41c6fb16892 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -197,11 +197,11 @@ static void WarnUndefinedFunctionResult(
 
 using StatementSemanticsPass1 = ExprChecker;
 using StatementSemanticsPass2 = SemanticsVisitor<AllocateChecker,
-    ArithmeticIfStmtChecker, CaseChecker, CoarrayChecker, DataChecker,
-    DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker, MiscChecker,
-    NamelistChecker, NullifyChecker, PurityChecker, ReturnStmtChecker,
-    SelectRankConstructChecker, SelectTypeChecker, StopChecker>;
-using StatementSemanticsPass3 = SemanticsVisitor<AssignmentChecker>;
+    ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker, CoarrayChecker,
+    DataChecker, DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker,
+    MiscChecker, NamelistChecker, NullifyChecker, PurityChecker,
+    ReturnStmtChecker, SelectRankConstructChecker, SelectTypeChecker,
+    StopChecker>;
 
 static bool PerformStatementSemantics(
     SemanticsContext &context, parser::Program &program) {
@@ -212,7 +212,6 @@ static bool PerformStatementSemantics(
   StatementSemanticsPass1{context}.Walk(program);
   StatementSemanticsPass2 pass2{context};
   pass2.Walk(program);
-  StatementSemanticsPass3{context}.Walk(program);
   if (context.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC)) {
     SemanticsVisitor<AccStructureChecker>{context}.Walk(program);
   }

>From bbdd46de5f2a0fd741ac7cd5ef72e27a15208a77 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 7 Jul 2025 21:38:01 -0700
Subject: [PATCH 3/3] Rename language feature

---
 flang/include/flang/Optimizer/Builder/CUFCommon.h |  3 ++-
 flang/include/flang/Support/Fortran-features.h    |  2 +-
 flang/lib/Lower/Bridge.cpp                        |  5 +++--
 flang/lib/Optimizer/Builder/CUFCommon.cpp         |  6 ++++--
 flang/lib/Semantics/check-cuda.cpp                |  4 ++--
 flang/tools/bbc/bbc.cpp                           | 12 +++++++-----
 6 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/CUFCommon.h b/flang/include/flang/Optimizer/Builder/CUFCommon.h
index cfd8a2305d031..5c56dd6b695f8 100644
--- a/flang/include/flang/Optimizer/Builder/CUFCommon.h
+++ b/flang/include/flang/Optimizer/Builder/CUFCommon.h
@@ -27,7 +27,8 @@ mlir::gpu::GPUModuleOp getOrCreateGPUModule(mlir::ModuleOp mod,
                                             mlir::SymbolTable &symTab);
 
 bool isCUDADeviceContext(mlir::Operation *op);
-bool isCUDADeviceContext(mlir::Region &, bool isStdParEnabled = false);
+bool isCUDADeviceContext(mlir::Region &,
+                         bool isDoConcurrentOffloadEnabled = false);
 bool isRegisteredDeviceGlobal(fir::GlobalOp op);
 bool isRegisteredDeviceAttr(std::optional<cuf::DataAttribute> attr);
 
diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h
index 393ee7d0687c6..857de9479e4e5 100644
--- a/flang/include/flang/Support/Fortran-features.h
+++ b/flang/include/flang/Support/Fortran-features.h
@@ -55,7 +55,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     SavedLocalInSpecExpr, PrintNamelist, AssumedRankPassedToNonAssumedRank,
     IgnoreIrrelevantAttributes, Unsigned, AmbiguousStructureConstructor,
     ContiguousOkForSeqAssociation, ForwardRefExplicitTypeDummy,
-    InaccessibleDeferredOverride, CudaWarpMatchFunction, StdPar)
+    InaccessibleDeferredOverride, CudaWarpMatchFunction, DoConcurrentOffload)
 
 // Portability and suspicious usage warnings
 ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 1be64ea26d7ea..696473605a4e0 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4887,8 +4887,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     fir::FirOpBuilder &builder = getFirOpBuilder();
 
     bool isInDeviceContext = cuf::isCUDADeviceContext(
-        builder.getRegion(), getFoldingContext().languageFeatures().IsEnabled(
-                                 Fortran::common::LanguageFeature::StdPar));
+        builder.getRegion(),
+        getFoldingContext().languageFeatures().IsEnabled(
+            Fortran::common::LanguageFeature::DoConcurrentOffload));
 
     bool isCUDATransfer =
         IsCUDADataTransfer(assign.lhs, assign.rhs) && !isInDeviceContext;
diff --git a/flang/lib/Optimizer/Builder/CUFCommon.cpp b/flang/lib/Optimizer/Builder/CUFCommon.cpp
index d40ae70503350..dcbf4991907bf 100644
--- a/flang/lib/Optimizer/Builder/CUFCommon.cpp
+++ b/flang/lib/Optimizer/Builder/CUFCommon.cpp
@@ -43,7 +43,8 @@ bool cuf::isCUDADeviceContext(mlir::Operation *op) {
 // for it.
 // If the insertion point is inside an OpenACC region op, it is considered
 // device context.
-bool cuf::isCUDADeviceContext(mlir::Region &region, bool isStdParEnabled) {
+bool cuf::isCUDADeviceContext(mlir::Region &region,
+                              bool isDoConcurrentOffloadEnabled) {
   if (region.getParentOfType<cuf::KernelOp>())
     return true;
   if (region.getParentOfType<mlir::acc::ComputeRegionOpInterface>())
@@ -56,7 +57,8 @@ bool cuf::isCUDADeviceContext(mlir::Region &region, bool isStdParEnabled) {
              cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
     }
   }
-  if (isStdParEnabled && region.getParentOfType<fir::DoConcurrentLoopOp>())
+  if (isDoConcurrentOffloadEnabled &&
+      region.getParentOfType<fir::DoConcurrentLoopOp>())
     return true;
   return false;
 }
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index c399fdb5a48de..8103402598f5f 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -729,14 +729,14 @@ void CUDAChecker::Leave(const parser::OpenACCLoopConstruct &) {
 void CUDAChecker::Enter(const parser::DoConstruct &x) {
   if (x.IsDoConcurrent() &&
       context_.foldingContext().languageFeatures().IsEnabled(
-          common::LanguageFeature::StdPar)) {
+          common::LanguageFeature::DoConcurrentOffload)) {
     ++deviceConstructDepth_;
   }
 }
 void CUDAChecker::Leave(const parser::DoConstruct &x) {
   if (x.IsDoConcurrent() &&
       context_.foldingContext().languageFeatures().IsEnabled(
-          common::LanguageFeature::StdPar)) {
+          common::LanguageFeature::DoConcurrentOffload)) {
     --deviceConstructDepth_;
   }
 }
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 25d072c93f22c..d216069654bc4 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -223,9 +223,10 @@ static llvm::cl::opt<bool> enableCUDA("fcuda",
                                       llvm::cl::desc("enable CUDA Fortran"),
                                       llvm::cl::init(false));
 
-static llvm::cl::opt<bool> enableStdPar("stdpar",
-                                        llvm::cl::desc("enable stdpar"),
-                                        llvm::cl::init(false));
+static llvm::cl::opt<bool>
+    enableDoConcurrentOffload("fdoconcurrent-offload",
+                              llvm::cl::desc("enable do concurrent offload"),
+                              llvm::cl::init(false));
 
 static llvm::cl::opt<bool>
     disableCUDAWarpFunction("fcuda-disable-warp-function",
@@ -612,8 +613,9 @@ int main(int argc, char **argv) {
     options.features.Enable(Fortran::common::LanguageFeature::CUDA);
   }
 
-  if (enableStdPar) {
-    options.features.Enable(Fortran::common::LanguageFeature::StdPar);
+  if (enableDoConcurrentOffload) {
+    options.features.Enable(
+        Fortran::common::LanguageFeature::DoConcurrentOffload);
   }
 
   if (disableCUDAWarpFunction) {



More information about the flang-commits mailing list