[flang-commits] [flang] [flang][openacc] Allow open acc routines from other modules. (PR #136012)

Andre Kuhlenschmidt via flang-commits flang-commits at lists.llvm.org
Fri May 2 11:31:50 PDT 2025


https://github.com/akuhlens updated https://github.com/llvm/llvm-project/pull/136012

>From 0f4591ee621e2e9d7acb0e6066b556cb7e243162 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Wed, 16 Apr 2025 12:01:24 -0700
Subject: [PATCH 01/11] initial commit

---
 flang/include/flang/Lower/AbstractConverter.h |   4 +
 flang/include/flang/Lower/OpenACC.h           |  10 +-
 flang/include/flang/Semantics/symbol.h        |  23 +-
 flang/lib/Lower/Bridge.cpp                    |   7 +-
 flang/lib/Lower/CallInterface.cpp             |  10 +
 flang/lib/Lower/OpenACC.cpp                   | 197 ++++++++++++++----
 flang/lib/Semantics/mod-file.cpp              |   1 +
 flang/lib/Semantics/resolve-directives.cpp    |  83 ++++----
 8 files changed, 233 insertions(+), 102 deletions(-)

diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 1d1323642bf9c..59419e829718f 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -14,6 +14,7 @@
 #define FORTRAN_LOWER_ABSTRACTCONVERTER_H
 
 #include "flang/Lower/LoweringOptions.h"
+#include "flang/Lower/OpenACC.h"
 #include "flang/Lower/PFTDefs.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Dialect/FIRAttr.h"
@@ -357,6 +358,9 @@ class AbstractConverter {
   /// functions in order to be in sync).
   virtual mlir::SymbolTable *getMLIRSymbolTable() = 0;
 
+  virtual Fortran::lower::AccRoutineInfoMappingList &
+  getAccDelayedRoutines() = 0;
+
 private:
   /// Options controlling lowering behavior.
   const Fortran::lower::LoweringOptions &loweringOptions;
diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 0d7038a7fd856..7832e8b69ea23 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -22,6 +22,9 @@ class StringRef;
 } // namespace llvm
 
 namespace mlir {
+namespace func {
+class FuncOp;
+}
 class Location;
 class Type;
 class ModuleOp;
@@ -42,6 +45,7 @@ struct OpenACCRoutineConstruct;
 } // namespace parser
 
 namespace semantics {
+class OpenACCRoutineInfo;
 class SemanticsContext;
 class Symbol;
 } // namespace semantics
@@ -79,8 +83,10 @@ void genOpenACCDeclarativeConstruct(AbstractConverter &,
 void genOpenACCRoutineConstruct(AbstractConverter &,
                                 Fortran::semantics::SemanticsContext &,
                                 mlir::ModuleOp,
-                                const parser::OpenACCRoutineConstruct &,
-                                AccRoutineInfoMappingList &);
+                                const parser::OpenACCRoutineConstruct &);
+void genOpenACCRoutineConstruct(
+    AbstractConverter &, mlir::ModuleOp, mlir::func::FuncOp,
+    const std::vector<Fortran::semantics::OpenACCRoutineInfo> &);
 
 void finalizeOpenACCRoutineAttachment(mlir::ModuleOp,
                                       AccRoutineInfoMappingList &);
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 715811885c219..1b6b247c9f5bc 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -127,6 +127,8 @@ class WithBindName {
 // Device type specific OpenACC routine information
 class OpenACCRoutineDeviceTypeInfo {
 public:
+  OpenACCRoutineDeviceTypeInfo(Fortran::common::OpenACCDeviceType dType)
+      : deviceType_{dType} {}
   bool isSeq() const { return isSeq_; }
   void set_isSeq(bool value = true) { isSeq_ = value; }
   bool isVector() const { return isVector_; }
@@ -141,9 +143,7 @@ class OpenACCRoutineDeviceTypeInfo {
     return bindName_ ? &*bindName_ : nullptr;
   }
   void set_bindName(std::string &&name) { bindName_ = std::move(name); }
-  void set_dType(Fortran::common::OpenACCDeviceType dType) {
-    deviceType_ = dType;
-  }
+
   Fortran::common::OpenACCDeviceType dType() const { return deviceType_; }
 
 private:
@@ -162,13 +162,24 @@ class OpenACCRoutineDeviceTypeInfo {
 // in as objects in the OpenACCRoutineDeviceTypeInfo list.
 class OpenACCRoutineInfo : public OpenACCRoutineDeviceTypeInfo {
 public:
+  OpenACCRoutineInfo()
+      : OpenACCRoutineDeviceTypeInfo(Fortran::common::OpenACCDeviceType::None) {
+  }
   bool isNohost() const { return isNohost_; }
   void set_isNohost(bool value = true) { isNohost_ = value; }
-  std::list<OpenACCRoutineDeviceTypeInfo> &deviceTypeInfos() {
+  const std::list<OpenACCRoutineDeviceTypeInfo> &deviceTypeInfos() const {
     return deviceTypeInfos_;
   }
-  void add_deviceTypeInfo(OpenACCRoutineDeviceTypeInfo &info) {
-    deviceTypeInfos_.push_back(info);
+
+  OpenACCRoutineDeviceTypeInfo &add_deviceTypeInfo(
+      Fortran::common::OpenACCDeviceType type) {
+    return add_deviceTypeInfo(OpenACCRoutineDeviceTypeInfo(type));
+  }
+
+  OpenACCRoutineDeviceTypeInfo &add_deviceTypeInfo(
+      OpenACCRoutineDeviceTypeInfo &&info) {
+    deviceTypeInfos_.push_back(std::move(info));
+    return deviceTypeInfos_.back();
   }
 
 private:
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index b4d1197822a43..9285d587585f8 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -443,7 +443,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                     bridge.getModule(), bridge.getKindMap(), &mlirSymbolTable);
                 Fortran::lower::genOpenACCRoutineConstruct(
                     *this, bridge.getSemanticsContext(), bridge.getModule(),
-                    d.routine, accRoutineInfos);
+                    d.routine);
                 builder = nullptr;
               },
           },
@@ -4287,6 +4287,11 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return Fortran::lower::createMutableBox(loc, *this, expr, localSymbols);
   }
 
+  Fortran::lower::AccRoutineInfoMappingList &
+  getAccDelayedRoutines() override final {
+    return accRoutineInfos;
+  }
+
   // Create the [newRank] array with the lower bounds to be passed to the
   // runtime as a descriptor.
   mlir::Value createLboundArray(llvm::ArrayRef<mlir::Value> lbounds,
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index 226ba1e52c968..867248f16237e 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -1689,6 +1689,16 @@ class SignatureBuilder
                           "SignatureBuilder should only be used once");
     declare();
     interfaceDetermined = true;
+    if (procDesignator && procDesignator->GetInterfaceSymbol() &&
+        procDesignator->GetInterfaceSymbol()
+            ->has<Fortran::semantics::SubprogramDetails>()) {
+      auto info = procDesignator->GetInterfaceSymbol()
+                      ->get<Fortran::semantics::SubprogramDetails>();
+      if (!info.openACCRoutineInfos().empty()) {
+        genOpenACCRoutineConstruct(converter, converter.getModuleOp(),
+                                   getFuncOp(), info.openACCRoutineInfos());
+      }
+    }
     return getFuncOp();
   }
 
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 3dd35ed9ae481..37b660408af6c 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include <mlir/IR/MLIRContext.h>
 
 #define DEBUG_TYPE "flang-lower-openacc"
 
@@ -4139,11 +4140,152 @@ static void attachRoutineInfo(mlir::func::FuncOp func,
       mlir::acc::RoutineInfoAttr::get(func.getContext(), routines));
 }
 
+void createOpenACCRoutineConstruct(
+    Fortran::lower::AbstractConverter &converter, mlir::Location loc,
+    mlir::ModuleOp mod, mlir::func::FuncOp funcOp, std::string funcName,
+    bool hasNohost, llvm::SmallVector<mlir::Attribute> &bindNames,
+    llvm::SmallVector<mlir::Attribute> &bindNameDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &gangDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &gangDimValues,
+    llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &seqDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &workerDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &vectorDeviceTypes) {
+
+  std::stringstream routineOpName;
+  routineOpName << accRoutinePrefix.str() << routineCounter++;
+
+  for (auto routineOp : mod.getOps<mlir::acc::RoutineOp>()) {
+    if (routineOp.getFuncName().str().compare(funcName) == 0) {
+      // If the routine is already specified with the same clauses, just skip
+      // the operation creation.
+      if (compareDeviceTypeInfo(routineOp, bindNames, bindNameDeviceTypes,
+                                gangDeviceTypes, gangDimValues,
+                                gangDimDeviceTypes, seqDeviceTypes,
+                                workerDeviceTypes, vectorDeviceTypes) &&
+          routineOp.getNohost() == hasNohost)
+        return;
+      mlir::emitError(loc, "Routine already specified with different clauses");
+    }
+  }
+  std::string routineOpStr = routineOpName.str();
+  mlir::OpBuilder modBuilder(mod.getBodyRegion());
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  modBuilder.create<mlir::acc::RoutineOp>(
+      loc, routineOpStr, funcName,
+      bindNames.empty() ? nullptr : builder.getArrayAttr(bindNames),
+      bindNameDeviceTypes.empty() ? nullptr
+                                  : builder.getArrayAttr(bindNameDeviceTypes),
+      workerDeviceTypes.empty() ? nullptr
+                                : builder.getArrayAttr(workerDeviceTypes),
+      vectorDeviceTypes.empty() ? nullptr
+                                : builder.getArrayAttr(vectorDeviceTypes),
+      seqDeviceTypes.empty() ? nullptr : builder.getArrayAttr(seqDeviceTypes),
+      hasNohost, /*implicit=*/false,
+      gangDeviceTypes.empty() ? nullptr : builder.getArrayAttr(gangDeviceTypes),
+      gangDimValues.empty() ? nullptr : builder.getArrayAttr(gangDimValues),
+      gangDimDeviceTypes.empty() ? nullptr
+                                 : builder.getArrayAttr(gangDimDeviceTypes));
+
+  if (funcOp)
+    attachRoutineInfo(funcOp, builder.getSymbolRefAttr(routineOpStr));
+  else
+    // FuncOp is not lowered yet. Keep the information so the routine info
+    // can be attached later to the funcOp.
+    converter.getAccDelayedRoutines().push_back(
+        std::make_pair(funcName, builder.getSymbolRefAttr(routineOpStr)));
+}
+
+static void interpretRoutineDeviceInfo(
+    fir::FirOpBuilder &builder,
+    const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo,
+    llvm::SmallVector<mlir::Attribute> &seqDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &vectorDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &workerDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &bindNameDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &bindNames,
+    llvm::SmallVector<mlir::Attribute> &gangDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &gangDimValues,
+    llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypes) {
+  mlir::MLIRContext *context{builder.getContext()};
+  if (dinfo.isSeq()) {
+    seqDeviceTypes.push_back(
+        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+  }
+  if (dinfo.isVector()) {
+    vectorDeviceTypes.push_back(
+        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+  }
+  if (dinfo.isWorker()) {
+    workerDeviceTypes.push_back(
+        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+  }
+  if (dinfo.isGang()) {
+    unsigned gangDim = dinfo.gangDim();
+    auto deviceType =
+        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType()));
+    if (!gangDim) {
+      gangDeviceTypes.push_back(deviceType);
+    } else {
+      gangDimValues.push_back(
+          builder.getIntegerAttr(builder.getI64Type(), gangDim));
+      gangDimDeviceTypes.push_back(deviceType);
+    }
+  }
+  if (const std::string *bindName{dinfo.bindName()}) {
+    bindNames.push_back(builder.getStringAttr(*bindName));
+    bindNameDeviceTypes.push_back(
+        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+  }
+}
+
+void Fortran::lower::genOpenACCRoutineConstruct(
+    Fortran::lower::AbstractConverter &converter, mlir::ModuleOp mod,
+    mlir::func::FuncOp funcOp,
+    const std::vector<Fortran::semantics::OpenACCRoutineInfo> &routineInfos) {
+  CHECK(funcOp && "Expected a valid function operation");
+  fir::FirOpBuilder &builder{converter.getFirOpBuilder()};
+  mlir::Location loc{funcOp.getLoc()};
+  std::string funcName{funcOp.getName()};
+
+  // Collect the routine clauses
+  bool hasNohost{false};
+
+  llvm::SmallVector<mlir::Attribute> seqDeviceTypes, vectorDeviceTypes,
+      workerDeviceTypes, bindNameDeviceTypes, bindNames, gangDeviceTypes,
+      gangDimDeviceTypes, gangDimValues;
+
+  for (const Fortran::semantics::OpenACCRoutineInfo &info : routineInfos) {
+    // Device Independent Attributes
+    if (info.isNohost()) {
+      hasNohost = true;
+    }
+    // Note: Device Independent Attributes are set to the
+    // none device type in `info`.
+    interpretRoutineDeviceInfo(builder, info, seqDeviceTypes, vectorDeviceTypes,
+                               workerDeviceTypes, bindNameDeviceTypes,
+                               bindNames, gangDeviceTypes, gangDimValues,
+                               gangDimDeviceTypes);
+
+    // Device Dependent Attributes
+    for (const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo :
+         info.deviceTypeInfos()) {
+      interpretRoutineDeviceInfo(
+          builder, dinfo, seqDeviceTypes, vectorDeviceTypes, workerDeviceTypes,
+          bindNameDeviceTypes, bindNames, gangDeviceTypes, gangDimValues,
+          gangDimDeviceTypes);
+    }
+  }
+  createOpenACCRoutineConstruct(
+      converter, loc, mod, funcOp, funcName, hasNohost, bindNames,
+      bindNameDeviceTypes, gangDeviceTypes, gangDimValues, gangDimDeviceTypes,
+      seqDeviceTypes, workerDeviceTypes, vectorDeviceTypes);
+}
+
 void Fortran::lower::genOpenACCRoutineConstruct(
     Fortran::lower::AbstractConverter &converter,
     Fortran::semantics::SemanticsContext &semanticsContext, mlir::ModuleOp mod,
-    const Fortran::parser::OpenACCRoutineConstruct &routineConstruct,
-    Fortran::lower::AccRoutineInfoMappingList &accRoutineInfos) {
+    const Fortran::parser::OpenACCRoutineConstruct &routineConstruct) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Location loc = converter.genLocation(routineConstruct.source);
   std::optional<Fortran::parser::Name> name =
@@ -4174,6 +4316,7 @@ void Fortran::lower::genOpenACCRoutineConstruct(
       funcName = funcOp.getName();
     }
   }
+  // TODO: Refactor this to use the OpenACCRoutineInfo
   bool hasNohost = false;
 
   llvm::SmallVector<mlir::Attribute> seqDeviceTypes, vectorDeviceTypes,
@@ -4226,6 +4369,8 @@ void Fortran::lower::genOpenACCRoutineConstruct(
                    std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
       if (const auto *name =
               std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
+        // FIXME: This case mangles the name, the one below does not.
+        // which is correct?
         mlir::Attribute bindNameAttr =
             builder.getStringAttr(converter.mangleName(*name->symbol));
         for (auto crtDeviceTypeAttr : crtDeviceTypes) {
@@ -4255,47 +4400,10 @@ void Fortran::lower::genOpenACCRoutineConstruct(
     }
   }
 
-  mlir::OpBuilder modBuilder(mod.getBodyRegion());
-  std::stringstream routineOpName;
-  routineOpName << accRoutinePrefix.str() << routineCounter++;
-
-  for (auto routineOp : mod.getOps<mlir::acc::RoutineOp>()) {
-    if (routineOp.getFuncName().str().compare(funcName) == 0) {
-      // If the routine is already specified with the same clauses, just skip
-      // the operation creation.
-      if (compareDeviceTypeInfo(routineOp, bindNames, bindNameDeviceTypes,
-                                gangDeviceTypes, gangDimValues,
-                                gangDimDeviceTypes, seqDeviceTypes,
-                                workerDeviceTypes, vectorDeviceTypes) &&
-          routineOp.getNohost() == hasNohost)
-        return;
-      mlir::emitError(loc, "Routine already specified with different clauses");
-    }
-  }
-
-  modBuilder.create<mlir::acc::RoutineOp>(
-      loc, routineOpName.str(), funcName,
-      bindNames.empty() ? nullptr : builder.getArrayAttr(bindNames),
-      bindNameDeviceTypes.empty() ? nullptr
-                                  : builder.getArrayAttr(bindNameDeviceTypes),
-      workerDeviceTypes.empty() ? nullptr
-                                : builder.getArrayAttr(workerDeviceTypes),
-      vectorDeviceTypes.empty() ? nullptr
-                                : builder.getArrayAttr(vectorDeviceTypes),
-      seqDeviceTypes.empty() ? nullptr : builder.getArrayAttr(seqDeviceTypes),
-      hasNohost, /*implicit=*/false,
-      gangDeviceTypes.empty() ? nullptr : builder.getArrayAttr(gangDeviceTypes),
-      gangDimValues.empty() ? nullptr : builder.getArrayAttr(gangDimValues),
-      gangDimDeviceTypes.empty() ? nullptr
-                                 : builder.getArrayAttr(gangDimDeviceTypes));
-
-  if (funcOp)
-    attachRoutineInfo(funcOp, builder.getSymbolRefAttr(routineOpName.str()));
-  else
-    // FuncOp is not lowered yet. Keep the information so the routine info
-    // can be attached later to the funcOp.
-    accRoutineInfos.push_back(std::make_pair(
-        funcName, builder.getSymbolRefAttr(routineOpName.str())));
+  createOpenACCRoutineConstruct(
+      converter, loc, mod, funcOp, funcName, hasNohost, bindNames,
+      bindNameDeviceTypes, gangDeviceTypes, gangDimValues, gangDimDeviceTypes,
+      seqDeviceTypes, workerDeviceTypes, vectorDeviceTypes);
 }
 
 void Fortran::lower::finalizeOpenACCRoutineAttachment(
@@ -4443,8 +4551,7 @@ void Fortran::lower::genOpenACCDeclarativeConstruct(
             fir::FirOpBuilder &builder = converter.getFirOpBuilder();
             mlir::ModuleOp mod = builder.getModule();
             Fortran::lower::genOpenACCRoutineConstruct(
-                converter, semanticsContext, mod, routineConstruct,
-                accRoutineInfos);
+                converter, semanticsContext, mod, routineConstruct);
           },
       },
       accDeclConstruct.u);
diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp
index ee356e56e4458..befd204a671fc 100644
--- a/flang/lib/Semantics/mod-file.cpp
+++ b/flang/lib/Semantics/mod-file.cpp
@@ -1387,6 +1387,7 @@ Scope *ModFileReader::Read(SourceName name, std::optional<bool> isIntrinsic,
   parser::Options options;
   options.isModuleFile = true;
   options.features.Enable(common::LanguageFeature::BackslashEscapes);
+  options.features.Enable(common::LanguageFeature::OpenACC);
   options.features.Enable(common::LanguageFeature::OpenMP);
   options.features.Enable(common::LanguageFeature::CUDA);
   if (!isIntrinsic.value_or(false) && !notAModule) {
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index d75b4ea13d35f..93c334a3ca3cb 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1034,61 +1034,53 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
     Symbol &symbol, const parser::OpenACCRoutineConstruct &x) {
   if (symbol.has<SubprogramDetails>()) {
     Fortran::semantics::OpenACCRoutineInfo info;
+    std::vector<OpenACCRoutineDeviceTypeInfo *> currentDevices;
+    currentDevices.push_back(&info);
     const auto &clauses = std::get<Fortran::parser::AccClauseList>(x.t);
     for (const Fortran::parser::AccClause &clause : clauses.v) {
-      if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
-        if (info.deviceTypeInfos().empty()) {
-          info.set_isSeq();
-        } else {
-          info.deviceTypeInfos().back().set_isSeq();
+      if (const auto *dTypeClause =
+              std::get_if<Fortran::parser::AccClause::DeviceType>(&clause.u)) {
+        currentDevices.clear();
+        for (const auto &deviceTypeExpr : dTypeClause->v.v) {
+          currentDevices.push_back(&info.add_deviceTypeInfo(deviceTypeExpr.v));
         }
+      } else if (std::get_if<Fortran::parser::AccClause::Nohost>(&clause.u)) {
+        info.set_isNohost();
+      } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
+        for (auto &device : currentDevices)
+          device->set_isSeq();
+      } else if (std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
+        for (auto &device : currentDevices)
+          device->set_isVector();
+      } else if (std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
+        for (auto &device : currentDevices)
+          device->set_isWorker();
       } else if (const auto *gangClause =
                      std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
-        if (info.deviceTypeInfos().empty()) {
-          info.set_isGang();
-        } else {
-          info.deviceTypeInfos().back().set_isGang();
-        }
+        for (auto &device : currentDevices)
+          device->set_isGang();
         if (gangClause->v) {
           const Fortran::parser::AccGangArgList &x = *gangClause->v;
+          int numArgs{0};
           for (const Fortran::parser::AccGangArg &gangArg : x.v) {
+            CHECK(numArgs <= 1 && "expecting 0 or 1 gang dim args");
             if (const auto *dim =
                     std::get_if<Fortran::parser::AccGangArg::Dim>(&gangArg.u)) {
               if (const auto v{EvaluateInt64(context_, dim->v)}) {
-                if (info.deviceTypeInfos().empty()) {
-                  info.set_gangDim(*v);
-                } else {
-                  info.deviceTypeInfos().back().set_gangDim(*v);
-                }
+                for (auto &device : currentDevices)
+                  device->set_gangDim(*v);
               }
             }
+            numArgs++;
           }
         }
-      } else if (std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
-        if (info.deviceTypeInfos().empty()) {
-          info.set_isVector();
-        } else {
-          info.deviceTypeInfos().back().set_isVector();
-        }
-      } else if (std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
-        if (info.deviceTypeInfos().empty()) {
-          info.set_isWorker();
-        } else {
-          info.deviceTypeInfos().back().set_isWorker();
-        }
-      } else if (std::get_if<Fortran::parser::AccClause::Nohost>(&clause.u)) {
-        info.set_isNohost();
       } else if (const auto *bindClause =
                      std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
+        std::string bindName = "";
         if (const auto *name =
                 std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
           if (Symbol *sym = ResolveFctName(*name)) {
-            if (info.deviceTypeInfos().empty()) {
-              info.set_bindName(sym->name().ToString());
-            } else {
-              info.deviceTypeInfos().back().set_bindName(
-                  sym->name().ToString());
-            }
+            bindName = sym->name().ToString();
           } else {
             context_.Say((*name).source,
                 "No function or subroutine declared for '%s'"_err_en_US,
@@ -1101,21 +1093,16 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
               Fortran::parser::Unwrap<Fortran::parser::CharLiteralConstant>(
                   *charExpr);
           std::string str{std::get<std::string>(charConst->t)};
-          std::stringstream bindName;
-          bindName << "\"" << str << "\"";
-          if (info.deviceTypeInfos().empty()) {
-            info.set_bindName(bindName.str());
-          } else {
-            info.deviceTypeInfos().back().set_bindName(bindName.str());
+          std::stringstream bindNameStream;
+          bindNameStream << "\"" << str << "\"";
+          bindName = bindNameStream.str();
+        }
+        if (!bindName.empty()) {
+          // Fixme: do we need to ensure there there is only one device?
+          for (auto &device : currentDevices) {
+            device->set_bindName(std::string(bindName));
           }
         }
-      } else if (const auto *dType =
-                     std::get_if<Fortran::parser::AccClause::DeviceType>(
-                         &clause.u)) {
-        const parser::AccDeviceTypeExprList &deviceTypeExprList = dType->v;
-        OpenACCRoutineDeviceTypeInfo dtypeInfo;
-        dtypeInfo.set_dType(deviceTypeExprList.v.front().v);
-        info.add_deviceTypeInfo(dtypeInfo);
       }
     }
     symbol.get<SubprogramDetails>().add_openACCRoutineInfo(info);

>From 1b6da293788edc56eea566f5c15126de6955169c Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Tue, 22 Apr 2025 16:06:33 -0700
Subject: [PATCH 02/11] fix includes

---
 flang/lib/Lower/OpenACC.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 37b660408af6c..a3ebd9b931dc6 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -32,13 +32,13 @@
 #include "flang/Semantics/expression.h"
 #include "flang/Semantics/scope.h"
 #include "flang/Semantics/tools.h"
-#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include <mlir/IR/MLIRContext.h>
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/Support/LLVM.h"
 
 #define DEBUG_TYPE "flang-lower-openacc"
 

>From 7b65ac4c477e5e46bf369a3a9f94f69cf496ef6b Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Wed, 23 Apr 2025 13:50:19 -0700
Subject: [PATCH 03/11] adding test

---
 flang/include/flang/Semantics/symbol.h        |  7 +++++-
 flang/lib/Semantics/resolve-directives.cpp    |  6 ++++-
 .../Lower/OpenACC/acc-module-definition.f90   | 17 ++++++++++++++
 .../Lower/OpenACC/acc-routine-use-module.f90  | 23 +++++++++++++++++++
 4 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Lower/OpenACC/acc-module-definition.f90
 create mode 100644 flang/test/Lower/OpenACC/acc-routine-use-module.f90

diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 1b6b247c9f5bc..fe6c73997733a 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -142,7 +142,11 @@ class OpenACCRoutineDeviceTypeInfo {
   const std::string *bindName() const {
     return bindName_ ? &*bindName_ : nullptr;
   }
-  void set_bindName(std::string &&name) { bindName_ = std::move(name); }
+  bool bindNameIsInternal() const {return bindNameIsInternal_;}
+  void set_bindName(std::string &&name, bool isInternal=false) { 
+    bindName_ = std::move(name); 
+    bindNameIsInternal_ = isInternal;
+  }
 
   Fortran::common::OpenACCDeviceType dType() const { return deviceType_; }
 
@@ -153,6 +157,7 @@ class OpenACCRoutineDeviceTypeInfo {
   bool isGang_{false};
   unsigned gangDim_{0};
   std::optional<std::string> bindName_;
+  bool bindNameIsInternal_{false};
   Fortran::common::OpenACCDeviceType deviceType_{
       Fortran::common::OpenACCDeviceType::None};
 };
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 93c334a3ca3cb..8fb3559c34426 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1077,10 +1077,12 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
       } else if (const auto *bindClause =
                      std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
         std::string bindName = "";
+        bool isInternal = false;
         if (const auto *name =
                 std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
           if (Symbol *sym = ResolveFctName(*name)) {
             bindName = sym->name().ToString();
+            isInternal = true;
           } else {
             context_.Say((*name).source,
                 "No function or subroutine declared for '%s'"_err_en_US,
@@ -1100,12 +1102,14 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
         if (!bindName.empty()) {
           // Fixme: do we need to ensure there there is only one device?
           for (auto &device : currentDevices) {
-            device->set_bindName(std::string(bindName));
+            device->set_bindName(std::string(bindName), isInternal);
           }
         }
       }
     }
     symbol.get<SubprogramDetails>().add_openACCRoutineInfo(info);
+  } else {
+    llvm::errs() << "Couldnot add routine info to symbol: " << symbol.name() << "\n";
   }
 }
 
diff --git a/flang/test/Lower/OpenACC/acc-module-definition.f90 b/flang/test/Lower/OpenACC/acc-module-definition.f90
new file mode 100644
index 0000000000000..36e41fc631c77
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-module-definition.f90
@@ -0,0 +1,17 @@
+! RUN: rm -fr %t && mkdir -p %t && cd %t
+! RUN: bbc -fopenacc -emit-fir %s
+! RUN: cat mod1.mod | FileCheck %s
+
+!CHECK-LABEL: module mod1
+module mod1
+    contains
+      !CHECK subroutine callee(aa)
+      subroutine callee(aa)
+      !CHECK: !$acc routine seq
+      !$acc routine seq
+        integer :: aa
+        aa = 1
+      end subroutine
+      !CHECK: end
+      !CHECK: end
+end module
\ No newline at end of file
diff --git a/flang/test/Lower/OpenACC/acc-routine-use-module.f90 b/flang/test/Lower/OpenACC/acc-routine-use-module.f90
new file mode 100644
index 0000000000000..7fc96b0ef5684
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-routine-use-module.f90
@@ -0,0 +1,23 @@
+! RUN: rm -fr %t && mkdir -p %t && cd %t
+! RUN: bbc -emit-fir %S/acc-module-definition.f90
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! This test module is based off of flang/test/Lower/use_module.f90
+! The first runs ensures the module file is generated.
+
+module use_mod1
+  use mod1
+  contains
+    !CHECK: func.func @_QMuse_mod1Pcaller
+    !CHECK-SAME {
+    subroutine caller(aa)
+      integer :: aa
+      !$acc serial
+      !CHECK: fir.call @_QMmod1Pcallee
+      call callee(aa)
+      !$acc end serial
+    end subroutine
+    !CHECK: }
+    !CHECK: acc.routine @acc_routine_0 func(@_QMmod1Pcallee) seq
+    !CHECK: func.func private @_QMmod1Pcallee(!fir.ref<i32>) attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>}
+end module
\ No newline at end of file

>From 70f8d469346d22597c7b3ff38b2f4a84a82b6d85 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Fri, 25 Apr 2025 14:39:04 -0700
Subject: [PATCH 04/11] debugging failure

---
 flang/include/flang/Lower/OpenACC.h           |  7 ++
 flang/include/flang/Semantics/symbol.h        | 21 +++--
 flang/lib/Lower/Bridge.cpp                    | 21 +++--
 flang/lib/Lower/CallInterface.cpp             | 21 ++---
 flang/lib/Lower/OpenACC.cpp                   | 87 +++++++++++--------
 flang/lib/Semantics/mod-file.cpp              | 11 ++-
 flang/lib/Semantics/resolve-directives.cpp    | 16 ++--
 flang/lib/Semantics/symbol.cpp                | 46 ++++++++++
 .../test/Lower/OpenACC/acc-routine-named.f90  | 10 ++-
 .../Lower/OpenACC/acc-routine-use-module.f90  |  6 +-
 flang/test/Lower/OpenACC/acc-routine.f90      | 63 ++++++++------
 11 files changed, 199 insertions(+), 110 deletions(-)

diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 7832e8b69ea23..dc014a71526c3 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -37,11 +37,16 @@ class FirOpBuilder;
 }
 
 namespace Fortran {
+namespace evaluate {
+class ProcedureDesignator;
+} // namespace evaluate
+
 namespace parser {
 struct AccClauseList;
 struct OpenACCConstruct;
 struct OpenACCDeclarativeConstruct;
 struct OpenACCRoutineConstruct;
+struct ProcedureDesignator;
 } // namespace parser
 
 namespace semantics {
@@ -71,6 +76,8 @@ static constexpr llvm::StringRef declarePostDeallocSuffix =
 
 static constexpr llvm::StringRef privatizationRecipePrefix = "privatization";
 
+bool needsOpenACCRoutineConstruct(const Fortran::evaluate::ProcedureDesignator *);
+
 mlir::Value genOpenACCConstruct(AbstractConverter &,
                                 Fortran::semantics::SemanticsContext &,
                                 pft::Evaluation &,
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index fe6c73997733a..8c60a196bdfc1 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -22,6 +22,7 @@
 #include <list>
 #include <optional>
 #include <set>
+#include <variant>
 #include <vector>
 
 namespace llvm {
@@ -139,25 +140,26 @@ class OpenACCRoutineDeviceTypeInfo {
   void set_isGang(bool value = true) { isGang_ = value; }
   unsigned gangDim() const { return gangDim_; }
   void set_gangDim(unsigned value) { gangDim_ = value; }
-  const std::string *bindName() const {
-    return bindName_ ? &*bindName_ : nullptr;
+  const std::variant<std::string, SymbolRef> *bindName() const {
+    return bindName_.has_value() ? &*bindName_ : nullptr;
   }
-  bool bindNameIsInternal() const {return bindNameIsInternal_;}
-  void set_bindName(std::string &&name, bool isInternal=false) { 
-    bindName_ = std::move(name); 
-    bindNameIsInternal_ = isInternal;
+  const std::optional<std::variant<std::string, SymbolRef>> &bindNameOpt() const {
+    return bindName_;
   }
+  void set_bindName(std::string &&name) { bindName_.emplace(std::move(name)); }
+  void set_bindName(SymbolRef symbol) { bindName_.emplace(symbol); }
 
   Fortran::common::OpenACCDeviceType dType() const { return deviceType_; }
 
+  friend llvm::raw_ostream &operator<<(
+      llvm::raw_ostream &, const OpenACCRoutineDeviceTypeInfo &);
 private:
   bool isSeq_{false};
   bool isVector_{false};
   bool isWorker_{false};
   bool isGang_{false};
   unsigned gangDim_{0};
-  std::optional<std::string> bindName_;
-  bool bindNameIsInternal_{false};
+  std::optional<std::variant<std::string, SymbolRef>> bindName_;
   Fortran::common::OpenACCDeviceType deviceType_{
       Fortran::common::OpenACCDeviceType::None};
 };
@@ -187,6 +189,9 @@ class OpenACCRoutineInfo : public OpenACCRoutineDeviceTypeInfo {
     return deviceTypeInfos_.back();
   }
 
+  friend llvm::raw_ostream &operator<<(
+      llvm::raw_ostream &, const OpenACCRoutineInfo &);
+
 private:
   std::list<OpenACCRoutineDeviceTypeInfo> deviceTypeInfos_;
   bool isNohost_{false};
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 9285d587585f8..abe07bcfdfcda 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -438,14 +438,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
               [&](Fortran::lower::pft::ModuleLikeUnit &m) { lowerMod(m); },
               [&](Fortran::lower::pft::BlockDataUnit &b) {},
               [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {},
-              [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {
-                builder = new fir::FirOpBuilder(
-                    bridge.getModule(), bridge.getKindMap(), &mlirSymbolTable);
-                Fortran::lower::genOpenACCRoutineConstruct(
-                    *this, bridge.getSemanticsContext(), bridge.getModule(),
-                    d.routine);
-                builder = nullptr;
-              },
+              [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {},
           },
           u);
     }
@@ -472,6 +465,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// Declare a function.
   void declareFunction(Fortran::lower::pft::FunctionLikeUnit &funit) {
     setCurrentPosition(funit.getStartingSourceLoc());
+    builder = new fir::FirOpBuilder(
+        bridge.getModule(), bridge.getKindMap(), &mlirSymbolTable);
     for (int entryIndex = 0, last = funit.entryPointList.size();
          entryIndex < last; ++entryIndex) {
       funit.setActiveEntry(entryIndex);
@@ -498,6 +493,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     for (Fortran::lower::pft::ContainedUnit &unit : funit.containedUnitList)
       if (auto *f = std::get_if<Fortran::lower::pft::FunctionLikeUnit>(&unit))
         declareFunction(*f);
+    builder = nullptr;
   }
 
   /// Get the scope that is defining or using \p sym. The returned scope is not
@@ -1035,7 +1031,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return bridge.getSemanticsContext().FindScope(currentPosition);
   }
 
-  fir::FirOpBuilder &getFirOpBuilder() override final { return *builder; }
+  fir::FirOpBuilder &getFirOpBuilder() override final { 
+    CHECK(builder && "builder is not set before calling getFirOpBuilder");
+    return *builder; 
+  }
 
   mlir::ModuleOp getModuleOp() override final { return bridge.getModule(); }
 
@@ -5617,6 +5616,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     LLVM_DEBUG(llvm::dbgs() << "\n[bridge - startNewFunction]";
                if (auto *sym = scope.symbol()) llvm::dbgs() << " " << *sym;
                llvm::dbgs() << "\n");
+    // I don't think setting the builder is necessary here, because callee 
+    // always looks up the FuncOp from the module. If there was a function that
+    // was not declared yet. This call to callee will cause an assertion
+    //failure.
     Fortran::lower::CalleeInterface callee(funit, *this);
     mlir::func::FuncOp func = callee.addEntryBlockAndMapArguments();
     builder =
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index 867248f16237e..b938354e6bcb3 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -10,6 +10,7 @@
 #include "flang/Evaluate/fold.h"
 #include "flang/Lower/Bridge.h"
 #include "flang/Lower/Mangler.h"
+#include "flang/Lower/OpenACC.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/StatementContext.h"
 #include "flang/Lower/Support/Utils.h"
@@ -20,6 +21,7 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/Utils.h"
+#include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
 #include "flang/Support/Fortran.h"
@@ -715,6 +717,14 @@ void Fortran::lower::CallInterface<T>::declare() {
           func.setArgAttrs(placeHolder.index(), placeHolder.value().attributes);
 
       setCUDAAttributes(func, side().getProcedureSymbol(), characteristic);
+      
+      if (const Fortran::semantics::Symbol *sym = side().getProcedureSymbol()) {
+        if (const auto &info{sym->GetUltimate().detailsIf<Fortran::semantics::SubprogramDetails>()}) {
+          if (!info->openACCRoutineInfos().empty()) {
+            genOpenACCRoutineConstruct(converter, module, func, info->openACCRoutineInfos());
+          }
+        }
+      }
     }
   }
 }
@@ -1688,17 +1698,8 @@ class SignatureBuilder
       fir::emitFatalError(converter.getCurrentLocation(),
                           "SignatureBuilder should only be used once");
     declare();
+
     interfaceDetermined = true;
-    if (procDesignator && procDesignator->GetInterfaceSymbol() &&
-        procDesignator->GetInterfaceSymbol()
-            ->has<Fortran::semantics::SubprogramDetails>()) {
-      auto info = procDesignator->GetInterfaceSymbol()
-                      ->get<Fortran::semantics::SubprogramDetails>();
-      if (!info.openACCRoutineInfos().empty()) {
-        genOpenACCRoutineConstruct(converter, converter.getModuleOp(),
-                                   getFuncOp(), info.openACCRoutineInfos());
-      }
-    }
     return getFuncOp();
   }
 
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index a3ebd9b931dc6..eefa8fbf12b1a 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Support/LLVM.h"
@@ -4140,6 +4141,14 @@ static void attachRoutineInfo(mlir::func::FuncOp func,
       mlir::acc::RoutineInfoAttr::get(func.getContext(), routines));
 }
 
+static mlir::ArrayAttr getArrayAttrOrNull(fir::FirOpBuilder &builder, llvm::SmallVector<mlir::Attribute> &attributes) {
+  if (attributes.empty()) {
+    return nullptr;
+  } else {
+    return builder.getArrayAttr(attributes);
+  }
+}
+
 void createOpenACCRoutineConstruct(
     Fortran::lower::AbstractConverter &converter, mlir::Location loc,
     mlir::ModuleOp mod, mlir::func::FuncOp funcOp, std::string funcName,
@@ -4173,31 +4182,29 @@ void createOpenACCRoutineConstruct(
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   modBuilder.create<mlir::acc::RoutineOp>(
       loc, routineOpStr, funcName,
-      bindNames.empty() ? nullptr : builder.getArrayAttr(bindNames),
-      bindNameDeviceTypes.empty() ? nullptr
-                                  : builder.getArrayAttr(bindNameDeviceTypes),
-      workerDeviceTypes.empty() ? nullptr
-                                : builder.getArrayAttr(workerDeviceTypes),
-      vectorDeviceTypes.empty() ? nullptr
-                                : builder.getArrayAttr(vectorDeviceTypes),
-      seqDeviceTypes.empty() ? nullptr : builder.getArrayAttr(seqDeviceTypes),
+      getArrayAttrOrNull(builder, bindNames),
+      getArrayAttrOrNull(builder, bindNameDeviceTypes),
+      getArrayAttrOrNull(builder, workerDeviceTypes),
+      getArrayAttrOrNull(builder, vectorDeviceTypes),
+      getArrayAttrOrNull(builder, seqDeviceTypes),
       hasNohost, /*implicit=*/false,
-      gangDeviceTypes.empty() ? nullptr : builder.getArrayAttr(gangDeviceTypes),
-      gangDimValues.empty() ? nullptr : builder.getArrayAttr(gangDimValues),
-      gangDimDeviceTypes.empty() ? nullptr
-                                 : builder.getArrayAttr(gangDimDeviceTypes));
-
-  if (funcOp)
-    attachRoutineInfo(funcOp, builder.getSymbolRefAttr(routineOpStr));
-  else
+      getArrayAttrOrNull(builder, gangDeviceTypes),
+      getArrayAttrOrNull(builder, gangDimValues),
+      getArrayAttrOrNull(builder, gangDimDeviceTypes));
+
+  auto symbolRefAttr = builder.getSymbolRefAttr(routineOpStr);
+  if (funcOp) {
+    
+    attachRoutineInfo(funcOp, symbolRefAttr);
+  } else {
     // FuncOp is not lowered yet. Keep the information so the routine info
     // can be attached later to the funcOp.
-    converter.getAccDelayedRoutines().push_back(
-        std::make_pair(funcName, builder.getSymbolRefAttr(routineOpStr)));
+    converter.getAccDelayedRoutines().push_back(std::make_pair(funcName, symbolRefAttr));
+  }
 }
 
 static void interpretRoutineDeviceInfo(
-    fir::FirOpBuilder &builder,
+    Fortran::lower::AbstractConverter &converter,
     const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo,
     llvm::SmallVector<mlir::Attribute> &seqDeviceTypes,
     llvm::SmallVector<mlir::Attribute> &vectorDeviceTypes,
@@ -4207,23 +4214,24 @@ static void interpretRoutineDeviceInfo(
     llvm::SmallVector<mlir::Attribute> &gangDeviceTypes,
     llvm::SmallVector<mlir::Attribute> &gangDimValues,
     llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypes) {
-  mlir::MLIRContext *context{builder.getContext()};
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  auto getDeviceTypeAttr = [&]() -> mlir::Attribute {
+    auto context = builder.getContext();
+    auto value = getDeviceType(dinfo.dType());
+    return mlir::acc::DeviceTypeAttr::get(context, value );
+  };
   if (dinfo.isSeq()) {
-    seqDeviceTypes.push_back(
-        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+    seqDeviceTypes.push_back(getDeviceTypeAttr());
   }
   if (dinfo.isVector()) {
-    vectorDeviceTypes.push_back(
-        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+    vectorDeviceTypes.push_back(getDeviceTypeAttr());
   }
   if (dinfo.isWorker()) {
-    workerDeviceTypes.push_back(
-        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+    workerDeviceTypes.push_back(getDeviceTypeAttr());
   }
   if (dinfo.isGang()) {
     unsigned gangDim = dinfo.gangDim();
-    auto deviceType =
-        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType()));
+    auto deviceType = getDeviceTypeAttr();
     if (!gangDim) {
       gangDeviceTypes.push_back(deviceType);
     } else {
@@ -4232,10 +4240,18 @@ static void interpretRoutineDeviceInfo(
       gangDimDeviceTypes.push_back(deviceType);
     }
   }
-  if (const std::string *bindName{dinfo.bindName()}) {
-    bindNames.push_back(builder.getStringAttr(*bindName));
-    bindNameDeviceTypes.push_back(
-        mlir::acc::DeviceTypeAttr::get(context, getDeviceType(dinfo.dType())));
+  if (dinfo.bindNameOpt().has_value()) {
+    const auto &bindName = dinfo.bindNameOpt().value();
+    mlir::Attribute bindNameAttr;
+    if (const auto &bindStr{std::get_if<std::string>(&bindName)}) {
+        bindNameAttr = builder.getStringAttr(*bindStr);
+      } else if (const auto &bindSym{std::get_if<Fortran::semantics::SymbolRef>(&bindName)}) {
+        bindNameAttr = builder.getStringAttr(converter.mangleName(*bindSym));
+      } else {
+        llvm_unreachable("Unsupported bind name type");     
+      }
+      bindNames.push_back(bindNameAttr);
+      bindNameDeviceTypes.push_back(getDeviceTypeAttr());
   }
 }
 
@@ -4244,7 +4260,6 @@ void Fortran::lower::genOpenACCRoutineConstruct(
     mlir::func::FuncOp funcOp,
     const std::vector<Fortran::semantics::OpenACCRoutineInfo> &routineInfos) {
   CHECK(funcOp && "Expected a valid function operation");
-  fir::FirOpBuilder &builder{converter.getFirOpBuilder()};
   mlir::Location loc{funcOp.getLoc()};
   std::string funcName{funcOp.getName()};
 
@@ -4262,7 +4277,7 @@ void Fortran::lower::genOpenACCRoutineConstruct(
     }
     // Note: Device Independent Attributes are set to the
     // none device type in `info`.
-    interpretRoutineDeviceInfo(builder, info, seqDeviceTypes, vectorDeviceTypes,
+    interpretRoutineDeviceInfo(converter, info, seqDeviceTypes, vectorDeviceTypes,
                                workerDeviceTypes, bindNameDeviceTypes,
                                bindNames, gangDeviceTypes, gangDimValues,
                                gangDimDeviceTypes);
@@ -4271,7 +4286,7 @@ void Fortran::lower::genOpenACCRoutineConstruct(
     for (const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo :
          info.deviceTypeInfos()) {
       interpretRoutineDeviceInfo(
-          builder, dinfo, seqDeviceTypes, vectorDeviceTypes, workerDeviceTypes,
+          converter, dinfo, seqDeviceTypes, vectorDeviceTypes, workerDeviceTypes,
           bindNameDeviceTypes, bindNames, gangDeviceTypes, gangDimValues,
           gangDimDeviceTypes);
     }
@@ -4369,8 +4384,6 @@ void Fortran::lower::genOpenACCRoutineConstruct(
                    std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
       if (const auto *name =
               std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
-        // FIXME: This case mangles the name, the one below does not.
-        // which is correct?
         mlir::Attribute bindNameAttr =
             builder.getStringAttr(converter.mangleName(*name->symbol));
         for (auto crtDeviceTypeAttr : crtDeviceTypes) {
diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp
index befd204a671fc..76dc8db590f22 100644
--- a/flang/lib/Semantics/mod-file.cpp
+++ b/flang/lib/Semantics/mod-file.cpp
@@ -24,6 +24,7 @@
 #include <fstream>
 #include <set>
 #include <string_view>
+#include <variant>
 #include <vector>
 
 namespace Fortran::semantics {
@@ -638,8 +639,14 @@ static void PutOpenACCDeviceTypeRoutineInfo(
   if (info.isWorker()) {
     os << " worker";
   }
-  if (info.bindName()) {
-    os << " bind(" << *info.bindName() << ")";
+  if (const std::variant<std::string, SymbolRef> *bindName{info.bindName()}) {
+    os << " bind(";
+    if (std::holds_alternative<std::string>(*bindName)) {
+      os << "\"" << std::get<std::string>(*bindName) << "\"";
+    } else {
+      os << std::get<SymbolRef>(*bindName)->name();
+    }
+    os << ")";
   }
 }
 
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 8fb3559c34426..a8f00b546306e 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1076,13 +1076,13 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
         }
       } else if (const auto *bindClause =
                      std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
-        std::string bindName = "";
-        bool isInternal = false;
         if (const auto *name =
                 std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
           if (Symbol *sym = ResolveFctName(*name)) {
-            bindName = sym->name().ToString();
-            isInternal = true;
+            Symbol &ultimate{sym->GetUltimate()};
+            for (auto &device : currentDevices) {
+              device->set_bindName(SymbolRef(ultimate));
+            }
           } else {
             context_.Say((*name).source,
                 "No function or subroutine declared for '%s'"_err_en_US,
@@ -1095,14 +1095,8 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
               Fortran::parser::Unwrap<Fortran::parser::CharLiteralConstant>(
                   *charExpr);
           std::string str{std::get<std::string>(charConst->t)};
-          std::stringstream bindNameStream;
-          bindNameStream << "\"" << str << "\"";
-          bindName = bindNameStream.str();
-        }
-        if (!bindName.empty()) {
-          // Fixme: do we need to ensure there there is only one device?
           for (auto &device : currentDevices) {
-            device->set_bindName(std::string(bindName), isInternal);
+            device->set_bindName(std::string(str));
           }
         }
       }
diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp
index 32eb6c2c5a188..d44df4669fa36 100644
--- a/flang/lib/Semantics/symbol.cpp
+++ b/flang/lib/Semantics/symbol.cpp
@@ -144,6 +144,52 @@ llvm::raw_ostream &operator<<(
       os << ' ' << x;
     }
   }
+  if (!x.openACCRoutineInfos_.empty()) {
+    os << " openACCRoutineInfos:";
+    for (const auto x : x.openACCRoutineInfos_) {
+      os << x;
+    }
+  }
+  return os;
+}
+
+llvm::raw_ostream &operator<<(
+    llvm::raw_ostream &os, const OpenACCRoutineDeviceTypeInfo &x) {
+  if (x.dType() != common::OpenACCDeviceType::None) {
+    os << " deviceType(" << common::EnumToString(x.dType()) << ')';
+  }
+  if (x.isSeq()) {
+    os << " seq";
+  }
+  if (x.isVector()) {
+    os << " vector";
+  }
+  if (x.isWorker()) {
+    os << " worker";
+  }
+  if (x.isGang()) {
+    os << " gang(" << x.gangDim() << ')';
+  }
+  if (const auto *bindName{x.bindName()}) {
+    if (const auto &symbol{std::get_if<std::string>(bindName)}) {
+      os << " bindName(\"" << *symbol << "\")";
+    } else {
+      const SymbolRef s{std::get<SymbolRef>(*bindName)};
+      os << " bindName(" << s->name() << ")";
+    }
+  }
+  return os;
+}
+
+llvm::raw_ostream &operator<<(
+    llvm::raw_ostream &os, const OpenACCRoutineInfo &x) {
+  if (x.isNohost()) {
+    os << " nohost";
+  }
+  os << static_cast<const OpenACCRoutineDeviceTypeInfo &>(x);
+  for (const auto &d : x.deviceTypeInfos_) {
+    os << d;
+  }
   return os;
 }
 
diff --git a/flang/test/Lower/OpenACC/acc-routine-named.f90 b/flang/test/Lower/OpenACC/acc-routine-named.f90
index 2cf6bf8b2bc06..de9784a1146cc 100644
--- a/flang/test/Lower/OpenACC/acc-routine-named.f90
+++ b/flang/test/Lower/OpenACC/acc-routine-named.f90
@@ -4,8 +4,8 @@
 
 module acc_routines
 
-! CHECK: acc.routine @acc_routine_1 func(@_QMacc_routinesPacc2)
-! CHECK: acc.routine @acc_routine_0 func(@_QMacc_routinesPacc1) seq
+! CHECK: acc.routine @[[r0:.*]] func(@_QMacc_routinesPacc2)
+! CHECK: acc.routine @[[r1:.*]] func(@_QMacc_routinesPacc1) seq
 
 !$acc routine(acc1) seq
 
@@ -14,12 +14,14 @@ module acc_routines
   subroutine acc1()
   end subroutine
 
-! CHECK-LABEL: func.func @_QMacc_routinesPacc1() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>}
+! CHECK-LABEL: func.func @_QMacc_routinesPacc1() 
+! CHECK-SAME:attributes {acc.routine_info = #acc.routine_info<[@[[r1]]]>}
 
   subroutine acc2()
     !$acc routine(acc2)
   end subroutine
 
-! CHECK-LABEL: func.func @_QMacc_routinesPacc2() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_1]>}
+! CHECK-LABEL: func.func @_QMacc_routinesPacc2()
+! CHECK-SAME:attributes {acc.routine_info = #acc.routine_info<[@[[r0]]]>}
 
 end module
diff --git a/flang/test/Lower/OpenACC/acc-routine-use-module.f90 b/flang/test/Lower/OpenACC/acc-routine-use-module.f90
index 7fc96b0ef5684..059324230a746 100644
--- a/flang/test/Lower/OpenACC/acc-routine-use-module.f90
+++ b/flang/test/Lower/OpenACC/acc-routine-use-module.f90
@@ -1,6 +1,6 @@
 ! RUN: rm -fr %t && mkdir -p %t && cd %t
-! RUN: bbc -emit-fir %S/acc-module-definition.f90
-! RUN: bbc -emit-fir %s -o - | FileCheck %s
+! RUN: bbc -fopenacc -emit-fir %S/acc-module-definition.f90
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
 
 ! This test module is based off of flang/test/Lower/use_module.f90
 ! The first runs ensures the module file is generated.
@@ -8,6 +8,7 @@
 module use_mod1
   use mod1
   contains
+    !CHECK: acc.routine @acc_routine_0 func(@_QMmod1Pcallee) seq
     !CHECK: func.func @_QMuse_mod1Pcaller
     !CHECK-SAME {
     subroutine caller(aa)
@@ -18,6 +19,5 @@ subroutine caller(aa)
       !$acc end serial
     end subroutine
     !CHECK: }
-    !CHECK: acc.routine @acc_routine_0 func(@_QMmod1Pcallee) seq
     !CHECK: func.func private @_QMmod1Pcallee(!fir.ref<i32>) attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>}
 end module
\ No newline at end of file
diff --git a/flang/test/Lower/OpenACC/acc-routine.f90 b/flang/test/Lower/OpenACC/acc-routine.f90
index 1170af18bc334..789f3a57e1f79 100644
--- a/flang/test/Lower/OpenACC/acc-routine.f90
+++ b/flang/test/Lower/OpenACC/acc-routine.f90
@@ -2,69 +2,77 @@
 
 ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
 
-! CHECK: acc.routine @acc_routine_17 func(@_QPacc_routine19) bind("_QPacc_routine17" [#acc.device_type<host>], "_QPacc_routine17" [#acc.device_type<default>], "_QPacc_routine16" [#acc.device_type<multicore>])
-! CHECK: acc.routine @acc_routine_16 func(@_QPacc_routine18) bind("_QPacc_routine17" [#acc.device_type<host>], "_QPacc_routine16" [#acc.device_type<multicore>])
-! CHECK: acc.routine @acc_routine_15 func(@_QPacc_routine17) worker ([#acc.device_type<host>]) vector ([#acc.device_type<multicore>])
-! CHECK: acc.routine @acc_routine_14 func(@_QPacc_routine16) gang([#acc.device_type<nvidia>]) seq ([#acc.device_type<host>])
-! CHECK: acc.routine @acc_routine_10 func(@_QPacc_routine11) seq
-! CHECK: acc.routine @acc_routine_9 func(@_QPacc_routine10) seq
-! CHECK: acc.routine @acc_routine_8 func(@_QPacc_routine9) bind("_QPacc_routine9a")
-! CHECK: acc.routine @acc_routine_7 func(@_QPacc_routine8) bind("routine8_")
-! CHECK: acc.routine @acc_routine_6 func(@_QPacc_routine7) gang(dim: 1 : i64)
-! CHECK: acc.routine @acc_routine_5 func(@_QPacc_routine6) nohost
-! CHECK: acc.routine @acc_routine_4 func(@_QPacc_routine5) worker
-! CHECK: acc.routine @acc_routine_3 func(@_QPacc_routine4) vector
-! CHECK: acc.routine @acc_routine_2 func(@_QPacc_routine3) gang
-! CHECK: acc.routine @acc_routine_1 func(@_QPacc_routine2) seq
-! CHECK: acc.routine @acc_routine_0 func(@_QPacc_routine1)
+! CHECK: acc.routine @[[r14:.*]] func(@_QPacc_routine19) bind("_QPacc_routine17" [#acc.device_type<host>], "_QPacc_routine17" [#acc.device_type<default>], "_QPacc_routine16" [#acc.device_type<multicore>])
+! CHECK: acc.routine @[[r13:.*]] func(@_QPacc_routine18) bind("_QPacc_routine17" [#acc.device_type<host>], "_QPacc_routine16" [#acc.device_type<multicore>])
+! CHECK: acc.routine @[[r12:.*]] func(@_QPacc_routine17) worker ([#acc.device_type<host>]) vector ([#acc.device_type<multicore>])
+! CHECK: acc.routine @[[r11:.*]] func(@_QPacc_routine16) gang([#acc.device_type<nvidia>]) seq ([#acc.device_type<host>])
+! CHECK: acc.routine @[[r10:.*]] func(@_QPacc_routine11) seq
+! CHECK: acc.routine @[[r09:.*]] func(@_QPacc_routine10) seq
+! CHECK: acc.routine @[[r08:.*]] func(@_QPacc_routine9) bind("_QPacc_routine9a")
+! CHECK: acc.routine @[[r07:.*]] func(@_QPacc_routine8) bind("routine8_")
+! CHECK: acc.routine @[[r06:.*]] func(@_QPacc_routine7) gang(dim: 1 : i64)
+! CHECK: acc.routine @[[r05:.*]] func(@_QPacc_routine6) nohost
+! CHECK: acc.routine @[[r04:.*]] func(@_QPacc_routine5) worker
+! CHECK: acc.routine @[[r03:.*]] func(@_QPacc_routine4) vector
+! CHECK: acc.routine @[[r02:.*]] func(@_QPacc_routine3) gang
+! CHECK: acc.routine @[[r01:.*]] func(@_QPacc_routine2) seq
+! CHECK: acc.routine @[[r00:.*]] func(@_QPacc_routine1)
 
 subroutine acc_routine1()
   !$acc routine
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine1() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>}
+! CHECK-LABEL: func.func @_QPacc_routine1()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r00]]]>}
 
 subroutine acc_routine2()
   !$acc routine seq
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine2() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_1]>}
+! CHECK-LABEL: func.func @_QPacc_routine2()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r01]]]>}
 
 subroutine acc_routine3()
   !$acc routine gang
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine3() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_2]>}
+! CHECK-LABEL: func.func @_QPacc_routine3()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r02]]]>}
 
 subroutine acc_routine4()
   !$acc routine vector
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine4() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_3]>}
+! CHECK-LABEL: func.func @_QPacc_routine4()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r03]]]>}
 
 subroutine acc_routine5()
   !$acc routine worker
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine5() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_4]>}
+! CHECK-LABEL: func.func @_QPacc_routine5()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r04]]]>}
 
 subroutine acc_routine6()
   !$acc routine nohost
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine6() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_5]>}
+! CHECK-LABEL: func.func @_QPacc_routine6()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r05]]]>}
 
 subroutine acc_routine7()
   !$acc routine gang(dim:1)
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine7() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_6]>}
+! CHECK-LABEL: func.func @_QPacc_routine7()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r06]]]>}
 
 subroutine acc_routine8()
   !$acc routine bind("routine8_")
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine8() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_7]>}
+! CHECK-LABEL: func.func @_QPacc_routine8()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r07]]]>}
 
 subroutine acc_routine9a()
 end subroutine
@@ -73,20 +81,23 @@ subroutine acc_routine9()
   !$acc routine bind(acc_routine9a)
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine9() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_8]>}
+! CHECK-LABEL: func.func @_QPacc_routine9()
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r08]]]>}
 
 function acc_routine10()
   !$acc routine(acc_routine10) seq
 end function
 
-! CHECK-LABEL: func.func @_QPacc_routine10() -> f32 attributes {acc.routine_info = #acc.routine_info<[@acc_routine_9]>}
+! CHECK-LABEL: func.func @_QPacc_routine10() -> f32
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r09]]]>}
 
 subroutine acc_routine11(a)
   real :: a
   !$acc routine(acc_routine11) seq
 end subroutine
 
-! CHECK-LABEL: func.func @_QPacc_routine11(%arg0: !fir.ref<f32> {fir.bindc_name = "a"}) attributes {acc.routine_info = #acc.routine_info<[@acc_routine_10]>}
+! CHECK-LABEL: func.func @_QPacc_routine11(%arg0: !fir.ref<f32> {fir.bindc_name = "a"})
+! CHECK-SAME: attributes {acc.routine_info = #acc.routine_info<[@[[r10]]]>}
 
 subroutine acc_routine12()
 

>From e2d1a05d2de2356644d385e9099a7e6879143cc7 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Fri, 25 Apr 2025 14:40:14 -0700
Subject: [PATCH 05/11] clang-format

---
 flang/include/flang/Lower/OpenACC.h        |  3 +-
 flang/include/flang/Semantics/symbol.h     |  4 +-
 flang/lib/Lower/Bridge.cpp                 | 12 ++---
 flang/lib/Lower/CallInterface.cpp          |  9 ++--
 flang/lib/Lower/OpenACC.cpp                | 56 +++++++++++-----------
 flang/lib/Semantics/resolve-directives.cpp |  3 +-
 6 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index dc014a71526c3..35a33e751b52b 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -76,7 +76,8 @@ static constexpr llvm::StringRef declarePostDeallocSuffix =
 
 static constexpr llvm::StringRef privatizationRecipePrefix = "privatization";
 
-bool needsOpenACCRoutineConstruct(const Fortran::evaluate::ProcedureDesignator *);
+bool needsOpenACCRoutineConstruct(
+    const Fortran::evaluate::ProcedureDesignator *);
 
 mlir::Value genOpenACCConstruct(AbstractConverter &,
                                 Fortran::semantics::SemanticsContext &,
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 8c60a196bdfc1..eb34aac9c390d 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -143,7 +143,8 @@ class OpenACCRoutineDeviceTypeInfo {
   const std::variant<std::string, SymbolRef> *bindName() const {
     return bindName_.has_value() ? &*bindName_ : nullptr;
   }
-  const std::optional<std::variant<std::string, SymbolRef>> &bindNameOpt() const {
+  const std::optional<std::variant<std::string, SymbolRef>> &
+  bindNameOpt() const {
     return bindName_;
   }
   void set_bindName(std::string &&name) { bindName_.emplace(std::move(name)); }
@@ -153,6 +154,7 @@ class OpenACCRoutineDeviceTypeInfo {
 
   friend llvm::raw_ostream &operator<<(
       llvm::raw_ostream &, const OpenACCRoutineDeviceTypeInfo &);
+
 private:
   bool isSeq_{false};
   bool isVector_{false};
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index abe07bcfdfcda..5e7b783323bfd 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -465,8 +465,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// Declare a function.
   void declareFunction(Fortran::lower::pft::FunctionLikeUnit &funit) {
     setCurrentPosition(funit.getStartingSourceLoc());
-    builder = new fir::FirOpBuilder(
-        bridge.getModule(), bridge.getKindMap(), &mlirSymbolTable);
+    builder = new fir::FirOpBuilder(bridge.getModule(), bridge.getKindMap(),
+                                    &mlirSymbolTable);
     for (int entryIndex = 0, last = funit.entryPointList.size();
          entryIndex < last; ++entryIndex) {
       funit.setActiveEntry(entryIndex);
@@ -1031,9 +1031,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return bridge.getSemanticsContext().FindScope(currentPosition);
   }
 
-  fir::FirOpBuilder &getFirOpBuilder() override final { 
+  fir::FirOpBuilder &getFirOpBuilder() override final {
     CHECK(builder && "builder is not set before calling getFirOpBuilder");
-    return *builder; 
+    return *builder;
   }
 
   mlir::ModuleOp getModuleOp() override final { return bridge.getModule(); }
@@ -5616,10 +5616,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     LLVM_DEBUG(llvm::dbgs() << "\n[bridge - startNewFunction]";
                if (auto *sym = scope.symbol()) llvm::dbgs() << " " << *sym;
                llvm::dbgs() << "\n");
-    // I don't think setting the builder is necessary here, because callee 
+    // I don't think setting the builder is necessary here, because callee
     // always looks up the FuncOp from the module. If there was a function that
     // was not declared yet. This call to callee will cause an assertion
-    //failure.
+    // failure.
     Fortran::lower::CalleeInterface callee(funit, *this);
     mlir::func::FuncOp func = callee.addEntryBlockAndMapArguments();
     builder =
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index b938354e6bcb3..611eacfe178e5 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -717,11 +717,14 @@ void Fortran::lower::CallInterface<T>::declare() {
           func.setArgAttrs(placeHolder.index(), placeHolder.value().attributes);
 
       setCUDAAttributes(func, side().getProcedureSymbol(), characteristic);
-      
+
       if (const Fortran::semantics::Symbol *sym = side().getProcedureSymbol()) {
-        if (const auto &info{sym->GetUltimate().detailsIf<Fortran::semantics::SubprogramDetails>()}) {
+        if (const auto &info{
+                sym->GetUltimate()
+                    .detailsIf<Fortran::semantics::SubprogramDetails>()}) {
           if (!info->openACCRoutineInfos().empty()) {
-            genOpenACCRoutineConstruct(converter, module, func, info->openACCRoutineInfos());
+            genOpenACCRoutineConstruct(converter, module, func,
+                                       info->openACCRoutineInfos());
           }
         }
       }
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index eefa8fbf12b1a..891dc998bc596 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -32,14 +32,14 @@
 #include "flang/Semantics/expression.h"
 #include "flang/Semantics/scope.h"
 #include "flang/Semantics/tools.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Support/LLVM.h"
 
 #define DEBUG_TYPE "flang-lower-openacc"
 
@@ -4141,7 +4141,9 @@ static void attachRoutineInfo(mlir::func::FuncOp func,
       mlir::acc::RoutineInfoAttr::get(func.getContext(), routines));
 }
 
-static mlir::ArrayAttr getArrayAttrOrNull(fir::FirOpBuilder &builder, llvm::SmallVector<mlir::Attribute> &attributes) {
+static mlir::ArrayAttr
+getArrayAttrOrNull(fir::FirOpBuilder &builder,
+                   llvm::SmallVector<mlir::Attribute> &attributes) {
   if (attributes.empty()) {
     return nullptr;
   } else {
@@ -4181,25 +4183,24 @@ void createOpenACCRoutineConstruct(
   mlir::OpBuilder modBuilder(mod.getBodyRegion());
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   modBuilder.create<mlir::acc::RoutineOp>(
-      loc, routineOpStr, funcName,
-      getArrayAttrOrNull(builder, bindNames),
+      loc, routineOpStr, funcName, getArrayAttrOrNull(builder, bindNames),
       getArrayAttrOrNull(builder, bindNameDeviceTypes),
       getArrayAttrOrNull(builder, workerDeviceTypes),
       getArrayAttrOrNull(builder, vectorDeviceTypes),
-      getArrayAttrOrNull(builder, seqDeviceTypes),
-      hasNohost, /*implicit=*/false,
-      getArrayAttrOrNull(builder, gangDeviceTypes),
+      getArrayAttrOrNull(builder, seqDeviceTypes), hasNohost,
+      /*implicit=*/false, getArrayAttrOrNull(builder, gangDeviceTypes),
       getArrayAttrOrNull(builder, gangDimValues),
       getArrayAttrOrNull(builder, gangDimDeviceTypes));
 
   auto symbolRefAttr = builder.getSymbolRefAttr(routineOpStr);
   if (funcOp) {
-    
+
     attachRoutineInfo(funcOp, symbolRefAttr);
   } else {
     // FuncOp is not lowered yet. Keep the information so the routine info
     // can be attached later to the funcOp.
-    converter.getAccDelayedRoutines().push_back(std::make_pair(funcName, symbolRefAttr));
+    converter.getAccDelayedRoutines().push_back(
+        std::make_pair(funcName, symbolRefAttr));
   }
 }
 
@@ -4218,7 +4219,7 @@ static void interpretRoutineDeviceInfo(
   auto getDeviceTypeAttr = [&]() -> mlir::Attribute {
     auto context = builder.getContext();
     auto value = getDeviceType(dinfo.dType());
-    return mlir::acc::DeviceTypeAttr::get(context, value );
+    return mlir::acc::DeviceTypeAttr::get(context, value);
   };
   if (dinfo.isSeq()) {
     seqDeviceTypes.push_back(getDeviceTypeAttr());
@@ -4244,14 +4245,15 @@ static void interpretRoutineDeviceInfo(
     const auto &bindName = dinfo.bindNameOpt().value();
     mlir::Attribute bindNameAttr;
     if (const auto &bindStr{std::get_if<std::string>(&bindName)}) {
-        bindNameAttr = builder.getStringAttr(*bindStr);
-      } else if (const auto &bindSym{std::get_if<Fortran::semantics::SymbolRef>(&bindName)}) {
-        bindNameAttr = builder.getStringAttr(converter.mangleName(*bindSym));
-      } else {
-        llvm_unreachable("Unsupported bind name type");     
-      }
-      bindNames.push_back(bindNameAttr);
-      bindNameDeviceTypes.push_back(getDeviceTypeAttr());
+      bindNameAttr = builder.getStringAttr(*bindStr);
+    } else if (const auto &bindSym{
+                   std::get_if<Fortran::semantics::SymbolRef>(&bindName)}) {
+      bindNameAttr = builder.getStringAttr(converter.mangleName(*bindSym));
+    } else {
+      llvm_unreachable("Unsupported bind name type");
+    }
+    bindNames.push_back(bindNameAttr);
+    bindNameDeviceTypes.push_back(getDeviceTypeAttr());
   }
 }
 
@@ -4277,18 +4279,18 @@ void Fortran::lower::genOpenACCRoutineConstruct(
     }
     // Note: Device Independent Attributes are set to the
     // none device type in `info`.
-    interpretRoutineDeviceInfo(converter, info, seqDeviceTypes, vectorDeviceTypes,
-                               workerDeviceTypes, bindNameDeviceTypes,
-                               bindNames, gangDeviceTypes, gangDimValues,
-                               gangDimDeviceTypes);
+    interpretRoutineDeviceInfo(converter, info, seqDeviceTypes,
+                               vectorDeviceTypes, workerDeviceTypes,
+                               bindNameDeviceTypes, bindNames, gangDeviceTypes,
+                               gangDimValues, gangDimDeviceTypes);
 
     // Device Dependent Attributes
     for (const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo :
          info.deviceTypeInfos()) {
       interpretRoutineDeviceInfo(
-          converter, dinfo, seqDeviceTypes, vectorDeviceTypes, workerDeviceTypes,
-          bindNameDeviceTypes, bindNames, gangDeviceTypes, gangDimValues,
-          gangDimDeviceTypes);
+          converter, dinfo, seqDeviceTypes, vectorDeviceTypes,
+          workerDeviceTypes, bindNameDeviceTypes, bindNames, gangDeviceTypes,
+          gangDimValues, gangDimDeviceTypes);
     }
   }
   createOpenACCRoutineConstruct(
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index a8f00b546306e..c2df7cddc0025 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1103,7 +1103,8 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
     }
     symbol.get<SubprogramDetails>().add_openACCRoutineInfo(info);
   } else {
-    llvm::errs() << "Couldnot add routine info to symbol: " << symbol.name() << "\n";
+    llvm::errs() << "Couldnot add routine info to symbol: " << symbol.name()
+                 << "\n";
   }
 }
 

>From c26093683edb7c0270809d2afb717450f92df6ab Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Fri, 25 Apr 2025 14:47:39 -0700
Subject: [PATCH 06/11] tidy up

---
 flang/include/flang/Lower/OpenACC.h | 1 -
 flang/lib/Lower/CallInterface.cpp   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 35a33e751b52b..9e71ad0a15c89 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -46,7 +46,6 @@ struct AccClauseList;
 struct OpenACCConstruct;
 struct OpenACCDeclarativeConstruct;
 struct OpenACCRoutineConstruct;
-struct ProcedureDesignator;
 } // namespace parser
 
 namespace semantics {
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index 611eacfe178e5..602b5c7bfa6c6 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -1701,7 +1701,6 @@ class SignatureBuilder
       fir::emitFatalError(converter.getCurrentLocation(),
                           "SignatureBuilder should only be used once");
     declare();
-
     interfaceDetermined = true;
     return getFuncOp();
   }

>From 1b825b55c808ac92cd2866d855611cd585eb28db Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Fri, 25 Apr 2025 17:00:27 -0700
Subject: [PATCH 07/11] cleaning up unused code

---
 flang/include/flang/Lower/AbstractConverter.h |   3 -
 flang/include/flang/Lower/OpenACC.h           |  18 +-
 flang/lib/Lower/Bridge.cpp                    |  43 +++--
 flang/lib/Lower/OpenACC.cpp                   | 166 +-----------------
 flang/lib/Semantics/resolve-directives.cpp    |  12 +-
 5 files changed, 32 insertions(+), 210 deletions(-)

diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 59419e829718f..2fa0da94b0396 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -358,9 +358,6 @@ class AbstractConverter {
   /// functions in order to be in sync).
   virtual mlir::SymbolTable *getMLIRSymbolTable() = 0;
 
-  virtual Fortran::lower::AccRoutineInfoMappingList &
-  getAccDelayedRoutines() = 0;
-
 private:
   /// Options controlling lowering behavior.
   const Fortran::lower::LoweringOptions &loweringOptions;
diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 9e71ad0a15c89..d2cd7712fb2c7 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -63,9 +63,6 @@ namespace pft {
 struct Evaluation;
 } // namespace pft
 
-using AccRoutineInfoMappingList =
-    llvm::SmallVector<std::pair<std::string, mlir::SymbolRefAttr>>;
-
 static constexpr llvm::StringRef declarePostAllocSuffix =
     "_acc_declare_update_desc_post_alloc";
 static constexpr llvm::StringRef declarePreDeallocSuffix =
@@ -82,22 +79,13 @@ mlir::Value genOpenACCConstruct(AbstractConverter &,
                                 Fortran::semantics::SemanticsContext &,
                                 pft::Evaluation &,
                                 const parser::OpenACCConstruct &);
-void genOpenACCDeclarativeConstruct(AbstractConverter &,
-                                    Fortran::semantics::SemanticsContext &,
-                                    StatementContext &,
-                                    const parser::OpenACCDeclarativeConstruct &,
-                                    AccRoutineInfoMappingList &);
-void genOpenACCRoutineConstruct(AbstractConverter &,
-                                Fortran::semantics::SemanticsContext &,
-                                mlir::ModuleOp,
-                                const parser::OpenACCRoutineConstruct &);
+void genOpenACCDeclarativeConstruct(
+    AbstractConverter &, Fortran::semantics::SemanticsContext &,
+    StatementContext &, const parser::OpenACCDeclarativeConstruct &);
 void genOpenACCRoutineConstruct(
     AbstractConverter &, mlir::ModuleOp, mlir::func::FuncOp,
     const std::vector<Fortran::semantics::OpenACCRoutineInfo> &);
 
-void finalizeOpenACCRoutineAttachment(mlir::ModuleOp,
-                                      AccRoutineInfoMappingList &);
-
 /// Get a acc.private.recipe op for the given type or create it if it does not
 /// exist yet.
 mlir::acc::PrivateRecipeOp createOrGetPrivateRecipe(mlir::OpBuilder &,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 5e7b783323bfd..1615493003898 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -458,15 +458,25 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                                   Fortran::common::LanguageFeature::CUDA));
       });
 
-    finalizeOpenACCLowering();
     finalizeOpenMPLowering(globalOmpRequiresSymbol);
   }
 
   /// Declare a function.
   void declareFunction(Fortran::lower::pft::FunctionLikeUnit &funit) {
+    // Since this is a recursive function, we only need to create a new builder
+    // for each top-level declaration. It would be simpler to have a single
+    // builder for the entire translation unit, but that requires a lot of
+    // changes to the code.
+    // FIXME: Once createGlobalOutsideOfFunctionLowering is fixed, we can
+    // remove this code and share the module builder.
+    bool newBuilder = false;
+    if (!builder) {
+      newBuilder = true;
+      builder = new fir::FirOpBuilder(bridge.getModule(), bridge.getKindMap(),
+                                      &mlirSymbolTable);
+    }
+    CHECK(builder && "FirOpBuilder did not instantiate");
     setCurrentPosition(funit.getStartingSourceLoc());
-    builder = new fir::FirOpBuilder(bridge.getModule(), bridge.getKindMap(),
-                                    &mlirSymbolTable);
     for (int entryIndex = 0, last = funit.entryPointList.size();
          entryIndex < last; ++entryIndex) {
       funit.setActiveEntry(entryIndex);
@@ -493,7 +503,11 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     for (Fortran::lower::pft::ContainedUnit &unit : funit.containedUnitList)
       if (auto *f = std::get_if<Fortran::lower::pft::FunctionLikeUnit>(&unit))
         declareFunction(*f);
-    builder = nullptr;
+
+    if (newBuilder) {
+      delete builder;
+      builder = nullptr;
+    }
   }
 
   /// Get the scope that is defining or using \p sym. The returned scope is not
@@ -3017,8 +3031,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
   void genFIR(const Fortran::parser::OpenACCDeclarativeConstruct &accDecl) {
     genOpenACCDeclarativeConstruct(*this, bridge.getSemanticsContext(),
-                                   bridge.openAccCtx(), accDecl,
-                                   accRoutineInfos);
+                                   bridge.openAccCtx(), accDecl);
     for (Fortran::lower::pft::Evaluation &e : getEval().getNestedEvaluations())
       genFIR(e);
   }
@@ -4286,11 +4299,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return Fortran::lower::createMutableBox(loc, *this, expr, localSymbols);
   }
 
-  Fortran::lower::AccRoutineInfoMappingList &
-  getAccDelayedRoutines() override final {
-    return accRoutineInfos;
-  }
-
   // Create the [newRank] array with the lower bounds to be passed to the
   // runtime as a descriptor.
   mlir::Value createLboundArray(llvm::ArrayRef<mlir::Value> lbounds,
@@ -5889,7 +5897,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// Helper to generate GlobalOps when the builder is not positioned in any
   /// region block. This is required because the FirOpBuilder assumes it is
   /// always positioned inside a region block when creating globals, the easiest
-  /// way comply is to create a dummy function and to throw it afterwards.
+  /// way comply is to create a dummy function and to throw it away afterwards.
   void createGlobalOutsideOfFunctionLowering(
       const std::function<void()> &createGlobals) {
     // FIXME: get rid of the bogus function context and instantiate the
@@ -5902,6 +5910,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         mlir::FunctionType::get(context, std::nullopt, std::nullopt),
         symbolTable);
     func.addEntryBlock();
+    CHECK(!builder && "Expected builder to be uninitialized");
     builder = new fir::FirOpBuilder(func, bridge.getKindMap(), symbolTable);
     assert(builder && "FirOpBuilder did not instantiate");
     builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions());
@@ -6331,13 +6340,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         expr.u);
   }
 
-  /// Performing OpenACC lowering action that were deferred to the end of
-  /// lowering.
-  void finalizeOpenACCLowering() {
-    Fortran::lower::finalizeOpenACCRoutineAttachment(getModuleOp(),
-                                                     accRoutineInfos);
-  }
-
   /// Performing OpenMP lowering actions that were deferred to the end of
   /// lowering.
   void finalizeOpenMPLowering(
@@ -6429,9 +6431,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// A counter for uniquing names in `literalNamesMap`.
   std::uint64_t uniqueLitId = 0;
 
-  /// Deferred OpenACC routine attachment.
-  Fortran::lower::AccRoutineInfoMappingList accRoutineInfos;
-
   /// Whether an OpenMP target region or declare target function/subroutine
   /// intended for device offloading has been detected
   bool ompDeviceCodeFound = false;
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 891dc998bc596..1a031dce7a487 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4163,9 +4163,6 @@ void createOpenACCRoutineConstruct(
     llvm::SmallVector<mlir::Attribute> &workerDeviceTypes,
     llvm::SmallVector<mlir::Attribute> &vectorDeviceTypes) {
 
-  std::stringstream routineOpName;
-  routineOpName << accRoutinePrefix.str() << routineCounter++;
-
   for (auto routineOp : mod.getOps<mlir::acc::RoutineOp>()) {
     if (routineOp.getFuncName().str().compare(funcName) == 0) {
       // If the routine is already specified with the same clauses, just skip
@@ -4179,6 +4176,8 @@ void createOpenACCRoutineConstruct(
       mlir::emitError(loc, "Routine already specified with different clauses");
     }
   }
+  std::stringstream routineOpName;
+  routineOpName << accRoutinePrefix.str() << routineCounter++;
   std::string routineOpStr = routineOpName.str();
   mlir::OpBuilder modBuilder(mod.getBodyRegion());
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
@@ -4192,16 +4191,7 @@ void createOpenACCRoutineConstruct(
       getArrayAttrOrNull(builder, gangDimValues),
       getArrayAttrOrNull(builder, gangDimDeviceTypes));
 
-  auto symbolRefAttr = builder.getSymbolRefAttr(routineOpStr);
-  if (funcOp) {
-
-    attachRoutineInfo(funcOp, symbolRefAttr);
-  } else {
-    // FuncOp is not lowered yet. Keep the information so the routine info
-    // can be attached later to the funcOp.
-    converter.getAccDelayedRoutines().push_back(
-        std::make_pair(funcName, symbolRefAttr));
-  }
+  attachRoutineInfo(funcOp, builder.getSymbolRefAttr(routineOpStr));
 }
 
 static void interpretRoutineDeviceInfo(
@@ -4299,145 +4289,6 @@ void Fortran::lower::genOpenACCRoutineConstruct(
       seqDeviceTypes, workerDeviceTypes, vectorDeviceTypes);
 }
 
-void Fortran::lower::genOpenACCRoutineConstruct(
-    Fortran::lower::AbstractConverter &converter,
-    Fortran::semantics::SemanticsContext &semanticsContext, mlir::ModuleOp mod,
-    const Fortran::parser::OpenACCRoutineConstruct &routineConstruct) {
-  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
-  mlir::Location loc = converter.genLocation(routineConstruct.source);
-  std::optional<Fortran::parser::Name> name =
-      std::get<std::optional<Fortran::parser::Name>>(routineConstruct.t);
-  const auto &clauses =
-      std::get<Fortran::parser::AccClauseList>(routineConstruct.t);
-  mlir::func::FuncOp funcOp;
-  std::string funcName;
-  if (name) {
-    funcName = converter.mangleName(*name->symbol);
-    funcOp =
-        builder.getNamedFunction(mod, builder.getMLIRSymbolTable(), funcName);
-  } else {
-    Fortran::semantics::Scope &scope =
-        semanticsContext.FindScope(routineConstruct.source);
-    const Fortran::semantics::Scope &progUnit{GetProgramUnitContaining(scope)};
-    const auto *subpDetails{
-        progUnit.symbol()
-            ? progUnit.symbol()
-                  ->detailsIf<Fortran::semantics::SubprogramDetails>()
-            : nullptr};
-    if (subpDetails && subpDetails->isInterface()) {
-      funcName = converter.mangleName(*progUnit.symbol());
-      funcOp =
-          builder.getNamedFunction(mod, builder.getMLIRSymbolTable(), funcName);
-    } else {
-      funcOp = builder.getFunction();
-      funcName = funcOp.getName();
-    }
-  }
-  // TODO: Refactor this to use the OpenACCRoutineInfo
-  bool hasNohost = false;
-
-  llvm::SmallVector<mlir::Attribute> seqDeviceTypes, vectorDeviceTypes,
-      workerDeviceTypes, bindNameDeviceTypes, bindNames, gangDeviceTypes,
-      gangDimDeviceTypes, gangDimValues;
-
-  // device_type attribute is set to `none` until a device_type clause is
-  // encountered.
-  llvm::SmallVector<mlir::Attribute> crtDeviceTypes;
-  crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
-      builder.getContext(), mlir::acc::DeviceType::None));
-
-  for (const Fortran::parser::AccClause &clause : clauses.v) {
-    if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
-      for (auto crtDeviceTypeAttr : crtDeviceTypes)
-        seqDeviceTypes.push_back(crtDeviceTypeAttr);
-    } else if (const auto *gangClause =
-                   std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
-      if (gangClause->v) {
-        const Fortran::parser::AccGangArgList &x = *gangClause->v;
-        for (const Fortran::parser::AccGangArg &gangArg : x.v) {
-          if (const auto *dim =
-                  std::get_if<Fortran::parser::AccGangArg::Dim>(&gangArg.u)) {
-            const std::optional<int64_t> dimValue = Fortran::evaluate::ToInt64(
-                *Fortran::semantics::GetExpr(dim->v));
-            if (!dimValue)
-              mlir::emitError(loc,
-                              "dim value must be a constant positive integer");
-            mlir::Attribute gangDimAttr =
-                builder.getIntegerAttr(builder.getI64Type(), *dimValue);
-            for (auto crtDeviceTypeAttr : crtDeviceTypes) {
-              gangDimValues.push_back(gangDimAttr);
-              gangDimDeviceTypes.push_back(crtDeviceTypeAttr);
-            }
-          }
-        }
-      } else {
-        for (auto crtDeviceTypeAttr : crtDeviceTypes)
-          gangDeviceTypes.push_back(crtDeviceTypeAttr);
-      }
-    } else if (std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
-      for (auto crtDeviceTypeAttr : crtDeviceTypes)
-        vectorDeviceTypes.push_back(crtDeviceTypeAttr);
-    } else if (std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
-      for (auto crtDeviceTypeAttr : crtDeviceTypes)
-        workerDeviceTypes.push_back(crtDeviceTypeAttr);
-    } else if (std::get_if<Fortran::parser::AccClause::Nohost>(&clause.u)) {
-      hasNohost = true;
-    } else if (const auto *bindClause =
-                   std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
-      if (const auto *name =
-              std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
-        mlir::Attribute bindNameAttr =
-            builder.getStringAttr(converter.mangleName(*name->symbol));
-        for (auto crtDeviceTypeAttr : crtDeviceTypes) {
-          bindNames.push_back(bindNameAttr);
-          bindNameDeviceTypes.push_back(crtDeviceTypeAttr);
-        }
-      } else if (const auto charExpr =
-                     std::get_if<Fortran::parser::ScalarDefaultCharExpr>(
-                         &bindClause->v.u)) {
-        const std::optional<std::string> name =
-            Fortran::semantics::GetConstExpr<std::string>(semanticsContext,
-                                                          *charExpr);
-        if (!name)
-          mlir::emitError(loc, "Could not retrieve the bind name");
-
-        mlir::Attribute bindNameAttr = builder.getStringAttr(*name);
-        for (auto crtDeviceTypeAttr : crtDeviceTypes) {
-          bindNames.push_back(bindNameAttr);
-          bindNameDeviceTypes.push_back(crtDeviceTypeAttr);
-        }
-      }
-    } else if (const auto *deviceTypeClause =
-                   std::get_if<Fortran::parser::AccClause::DeviceType>(
-                       &clause.u)) {
-      crtDeviceTypes.clear();
-      gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
-    }
-  }
-
-  createOpenACCRoutineConstruct(
-      converter, loc, mod, funcOp, funcName, hasNohost, bindNames,
-      bindNameDeviceTypes, gangDeviceTypes, gangDimValues, gangDimDeviceTypes,
-      seqDeviceTypes, workerDeviceTypes, vectorDeviceTypes);
-}
-
-void Fortran::lower::finalizeOpenACCRoutineAttachment(
-    mlir::ModuleOp mod,
-    Fortran::lower::AccRoutineInfoMappingList &accRoutineInfos) {
-  for (auto &mapping : accRoutineInfos) {
-    mlir::func::FuncOp funcOp =
-        mod.lookupSymbol<mlir::func::FuncOp>(mapping.first);
-    if (!funcOp)
-      mlir::emitWarning(mod.getLoc(),
-                        llvm::Twine("function '") + llvm::Twine(mapping.first) +
-                            llvm::Twine("' in acc routine directive is not "
-                                        "found in this translation unit."));
-    else
-      attachRoutineInfo(funcOp, mapping.second);
-  }
-  accRoutineInfos.clear();
-}
-
 static void
 genACC(Fortran::lower::AbstractConverter &converter,
        Fortran::lower::pft::Evaluation &eval,
@@ -4551,8 +4402,7 @@ void Fortran::lower::genOpenACCDeclarativeConstruct(
     Fortran::lower::AbstractConverter &converter,
     Fortran::semantics::SemanticsContext &semanticsContext,
     Fortran::lower::StatementContext &openAccCtx,
-    const Fortran::parser::OpenACCDeclarativeConstruct &accDeclConstruct,
-    Fortran::lower::AccRoutineInfoMappingList &accRoutineInfos) {
+    const Fortran::parser::OpenACCDeclarativeConstruct &accDeclConstruct) {
 
   Fortran::common::visit(
       common::visitors{
@@ -4561,13 +4411,7 @@ void Fortran::lower::genOpenACCDeclarativeConstruct(
             genACC(converter, semanticsContext, openAccCtx,
                    standaloneDeclarativeConstruct);
           },
-          [&](const Fortran::parser::OpenACCRoutineConstruct
-                  &routineConstruct) {
-            fir::FirOpBuilder &builder = converter.getFirOpBuilder();
-            mlir::ModuleOp mod = builder.getModule();
-            Fortran::lower::genOpenACCRoutineConstruct(
-                converter, semanticsContext, mod, routineConstruct);
-          },
+          [&](const Fortran::parser::OpenACCRoutineConstruct &x) {},
       },
       accDeclConstruct.u);
 }
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index c2df7cddc0025..d74953df1e630 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1041,9 +1041,8 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
       if (const auto *dTypeClause =
               std::get_if<Fortran::parser::AccClause::DeviceType>(&clause.u)) {
         currentDevices.clear();
-        for (const auto &deviceTypeExpr : dTypeClause->v.v) {
+        for (const auto &deviceTypeExpr : dTypeClause->v.v)
           currentDevices.push_back(&info.add_deviceTypeInfo(deviceTypeExpr.v));
-        }
       } else if (std::get_if<Fortran::parser::AccClause::Nohost>(&clause.u)) {
         info.set_isNohost();
       } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
@@ -1080,9 +1079,8 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
                 std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
           if (Symbol *sym = ResolveFctName(*name)) {
             Symbol &ultimate{sym->GetUltimate()};
-            for (auto &device : currentDevices) {
+            for (auto &device : currentDevices)
               device->set_bindName(SymbolRef(ultimate));
-            }
           } else {
             context_.Say((*name).source,
                 "No function or subroutine declared for '%s'"_err_en_US,
@@ -1095,16 +1093,12 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
               Fortran::parser::Unwrap<Fortran::parser::CharLiteralConstant>(
                   *charExpr);
           std::string str{std::get<std::string>(charConst->t)};
-          for (auto &device : currentDevices) {
+          for (auto &device : currentDevices)
             device->set_bindName(std::string(str));
-          }
         }
       }
     }
     symbol.get<SubprogramDetails>().add_openACCRoutineInfo(info);
-  } else {
-    llvm::errs() << "Couldnot add routine info to symbol: " << symbol.name()
-                 << "\n";
   }
 }
 

>From 158481e5eaf63c1b2b4c172b9c143ca4c10722f5 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Fri, 25 Apr 2025 17:15:26 -0700
Subject: [PATCH 08/11] a little more tidying up

---
 flang/include/flang/Lower/AbstractConverter.h | 1 -
 flang/include/flang/Lower/OpenACC.h           | 3 ---
 flang/lib/Lower/Bridge.cpp                    | 3 ++-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 2fa0da94b0396..1d1323642bf9c 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -14,7 +14,6 @@
 #define FORTRAN_LOWER_ABSTRACTCONVERTER_H
 
 #include "flang/Lower/LoweringOptions.h"
-#include "flang/Lower/OpenACC.h"
 #include "flang/Lower/PFTDefs.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Dialect/FIRAttr.h"
diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index d2cd7712fb2c7..4034953976427 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -72,9 +72,6 @@ static constexpr llvm::StringRef declarePostDeallocSuffix =
 
 static constexpr llvm::StringRef privatizationRecipePrefix = "privatization";
 
-bool needsOpenACCRoutineConstruct(
-    const Fortran::evaluate::ProcedureDesignator *);
-
 mlir::Value genOpenACCConstruct(AbstractConverter &,
                                 Fortran::semantics::SemanticsContext &,
                                 pft::Evaluation &,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 1615493003898..e50c91654f7bb 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -5897,7 +5897,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// Helper to generate GlobalOps when the builder is not positioned in any
   /// region block. This is required because the FirOpBuilder assumes it is
   /// always positioned inside a region block when creating globals, the easiest
-  /// way comply is to create a dummy function and to throw it away afterwards.
+  /// way to comply is to create a dummy function and to throw it away 
+  /// afterwards.
   void createGlobalOutsideOfFunctionLowering(
       const std::function<void()> &createGlobals) {
     // FIXME: get rid of the bogus function context and instantiate the

>From 8f6ae035147336c4ed04b5b25487f72ebc52c757 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Thu, 1 May 2025 08:12:35 -0700
Subject: [PATCH 09/11] more consistent use of builders

---
 flang/lib/Lower/Bridge.cpp        | 40 ++++++++++---------------------
 flang/lib/Lower/CallInterface.cpp |  1 -
 2 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e50c91654f7bb..fb20dfbaf477e 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -403,18 +403,21 @@ class FirConverter : public Fortran::lower::AbstractConverter {
               [&](Fortran::lower::pft::FunctionLikeUnit &f) {
                 if (f.isMainProgram())
                   hasMainProgram = true;
-                declareFunction(f);
+                createGlobalOutsideOfFunctionLowering(
+                    [&]() { declareFunction(f); });
                 if (!globalOmpRequiresSymbol)
                   globalOmpRequiresSymbol = f.getScope().symbol();
               },
               [&](Fortran::lower::pft::ModuleLikeUnit &m) {
                 lowerModuleDeclScope(m);
-                for (Fortran::lower::pft::ContainedUnit &unit :
-                     m.containedUnitList)
-                  if (auto *f =
-                          std::get_if<Fortran::lower::pft::FunctionLikeUnit>(
-                              &unit))
-                    declareFunction(*f);
+                createGlobalOutsideOfFunctionLowering([&]() {
+                  for (Fortran::lower::pft::ContainedUnit &unit :
+                       m.containedUnitList)
+                    if (auto *f =
+                            std::get_if<Fortran::lower::pft::FunctionLikeUnit>(
+                                &unit))
+                      declareFunction(*f);
+                });
               },
               [&](Fortran::lower::pft::BlockDataUnit &b) {
                 if (!globalOmpRequiresSymbol)
@@ -463,19 +466,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
   /// Declare a function.
   void declareFunction(Fortran::lower::pft::FunctionLikeUnit &funit) {
-    // Since this is a recursive function, we only need to create a new builder
-    // for each top-level declaration. It would be simpler to have a single
-    // builder for the entire translation unit, but that requires a lot of
-    // changes to the code.
-    // FIXME: Once createGlobalOutsideOfFunctionLowering is fixed, we can
-    // remove this code and share the module builder.
-    bool newBuilder = false;
-    if (!builder) {
-      newBuilder = true;
-      builder = new fir::FirOpBuilder(bridge.getModule(), bridge.getKindMap(),
-                                      &mlirSymbolTable);
-    }
-    CHECK(builder && "FirOpBuilder did not instantiate");
+    CHECK(builder && "declareFunction called with uninitialized builder");
     setCurrentPosition(funit.getStartingSourceLoc());
     for (int entryIndex = 0, last = funit.entryPointList.size();
          entryIndex < last; ++entryIndex) {
@@ -503,11 +494,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     for (Fortran::lower::pft::ContainedUnit &unit : funit.containedUnitList)
       if (auto *f = std::get_if<Fortran::lower::pft::FunctionLikeUnit>(&unit))
         declareFunction(*f);
-
-    if (newBuilder) {
-      delete builder;
-      builder = nullptr;
-    }
   }
 
   /// Get the scope that is defining or using \p sym. The returned scope is not
@@ -5624,9 +5610,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     LLVM_DEBUG(llvm::dbgs() << "\n[bridge - startNewFunction]";
                if (auto *sym = scope.symbol()) llvm::dbgs() << " " << *sym;
                llvm::dbgs() << "\n");
-    // I don't think setting the builder is necessary here, because callee
+    // Setting the builder is not necessary here, because callee
     // always looks up the FuncOp from the module. If there was a function that
-    // was not declared yet. This call to callee will cause an assertion
+    // was not declared yet, this call to callee will cause an assertion
     // failure.
     Fortran::lower::CalleeInterface callee(funit, *this);
     mlir::func::FuncOp func = callee.addEntryBlockAndMapArguments();
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index 602b5c7bfa6c6..8affa1e1965e8 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -21,7 +21,6 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/Utils.h"
-#include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
 #include "flang/Support/Fortran.h"

>From a52655d0b90055ad6ff062fbf66be3172a95973b Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Thu, 1 May 2025 08:18:14 -0700
Subject: [PATCH 10/11] delete space

---
 flang/lib/Lower/Bridge.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index fb20dfbaf477e..a6ee24edd8381 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -5883,7 +5883,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// Helper to generate GlobalOps when the builder is not positioned in any
   /// region block. This is required because the FirOpBuilder assumes it is
   /// always positioned inside a region block when creating globals, the easiest
-  /// way to comply is to create a dummy function and to throw it away 
+  /// way to comply is to create a dummy function and to throw it away
   /// afterwards.
   void createGlobalOutsideOfFunctionLowering(
       const std::function<void()> &createGlobals) {

>From b2a1da313284c9709eae6f757885df697843565c Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Fri, 2 May 2025 11:30:33 -0700
Subject: [PATCH 11/11] less builder creation

---
 flang/lib/Lower/Bridge.cpp | 109 ++++++++++++++++++-------------------
 1 file changed, 53 insertions(+), 56 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a6ee24edd8381..81127ab55a937 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -397,40 +397,39 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     //   they are available before lowering any function that may use them.
     bool hasMainProgram = false;
     const Fortran::semantics::Symbol *globalOmpRequiresSymbol = nullptr;
-    for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) {
-      Fortran::common::visit(
-          Fortran::common::visitors{
-              [&](Fortran::lower::pft::FunctionLikeUnit &f) {
-                if (f.isMainProgram())
-                  hasMainProgram = true;
-                createGlobalOutsideOfFunctionLowering(
-                    [&]() { declareFunction(f); });
-                if (!globalOmpRequiresSymbol)
-                  globalOmpRequiresSymbol = f.getScope().symbol();
-              },
-              [&](Fortran::lower::pft::ModuleLikeUnit &m) {
-                lowerModuleDeclScope(m);
-                createGlobalOutsideOfFunctionLowering([&]() {
+    createBuilderOutsideOfFuncOpAndDo([&]() {
+      for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) {
+        Fortran::common::visit(
+            Fortran::common::visitors{
+                [&](Fortran::lower::pft::FunctionLikeUnit &f) {
+                  if (f.isMainProgram())
+                    hasMainProgram = true;
+                  declareFunction(f);
+                  if (!globalOmpRequiresSymbol)
+                    globalOmpRequiresSymbol = f.getScope().symbol();
+                },
+                [&](Fortran::lower::pft::ModuleLikeUnit &m) {
+                  lowerModuleDeclScope(m);
                   for (Fortran::lower::pft::ContainedUnit &unit :
                        m.containedUnitList)
                     if (auto *f =
                             std::get_if<Fortran::lower::pft::FunctionLikeUnit>(
                                 &unit))
                       declareFunction(*f);
-                });
-              },
-              [&](Fortran::lower::pft::BlockDataUnit &b) {
-                if (!globalOmpRequiresSymbol)
-                  globalOmpRequiresSymbol = b.symTab.symbol();
-              },
-              [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {},
-              [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {},
-          },
-          u);
-    }
+                },
+                [&](Fortran::lower::pft::BlockDataUnit &b) {
+                  if (!globalOmpRequiresSymbol)
+                    globalOmpRequiresSymbol = b.symTab.symbol();
+                },
+                [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {},
+                [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {},
+            },
+            u);
+      }
+    });
 
     // Create definitions of intrinsic module constants.
-    createGlobalOutsideOfFunctionLowering(
+    createBuilderOutsideOfFuncOpAndDo(
         [&]() { createIntrinsicModuleDefinitions(pft); });
 
     // Primary translation pass.
@@ -449,12 +448,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     // Once all the code has been translated, create global runtime type info
     // data structures for the derived types that have been processed, as well
     // as fir.type_info operations for the dispatch tables.
-    createGlobalOutsideOfFunctionLowering(
+    createBuilderOutsideOfFuncOpAndDo(
         [&]() { typeInfoConverter.createTypeInfo(*this); });
 
     // Generate the `main` entry point if necessary
     if (hasMainProgram)
-      createGlobalOutsideOfFunctionLowering([&]() {
+      createBuilderOutsideOfFuncOpAndDo([&]() {
         fir::runtime::genMain(*builder, toLocation(),
                               bridge.getEnvironmentDefaults(),
                               getFoldingContext().languageFeatures().IsEnabled(
@@ -5885,7 +5884,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// always positioned inside a region block when creating globals, the easiest
   /// way to comply is to create a dummy function and to throw it away
   /// afterwards.
-  void createGlobalOutsideOfFunctionLowering(
+  void createBuilderOutsideOfFuncOpAndDo(
       const std::function<void()> &createGlobals) {
     // FIXME: get rid of the bogus function context and instantiate the
     // globals directly into the module.
@@ -5913,7 +5912,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
   /// Instantiate the data from a BLOCK DATA unit.
   void lowerBlockData(Fortran::lower::pft::BlockDataUnit &bdunit) {
-    createGlobalOutsideOfFunctionLowering([&]() {
+    createBuilderOutsideOfFuncOpAndDo([&]() {
       Fortran::lower::AggregateStoreMap fakeMap;
       for (const auto &[_, sym] : bdunit.symTab) {
         if (sym->has<Fortran::semantics::ObjectEntityDetails>()) {
@@ -5927,7 +5926,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// Create fir::Global for all the common blocks that appear in the program.
   void
   lowerCommonBlocks(const Fortran::semantics::CommonBlockList &commonBlocks) {
-    createGlobalOutsideOfFunctionLowering(
+    createBuilderOutsideOfFuncOpAndDo(
         [&]() { Fortran::lower::defineCommonBlocks(*this, commonBlocks); });
   }
 
@@ -5997,36 +5996,34 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// declarative construct.
   void lowerModuleDeclScope(Fortran::lower::pft::ModuleLikeUnit &mod) {
     setCurrentPosition(mod.getStartingSourceLoc());
-    createGlobalOutsideOfFunctionLowering([&]() {
-      auto &scopeVariableListMap =
-          Fortran::lower::pft::getScopeVariableListMap(mod);
-      for (const auto &var : Fortran::lower::pft::getScopeVariableList(
-               mod.getScope(), scopeVariableListMap)) {
-
-        // Only define the variables owned by this module.
-        const Fortran::semantics::Scope *owningScope = var.getOwningScope();
-        if (owningScope && mod.getScope() != *owningScope)
-          continue;
+    auto &scopeVariableListMap =
+        Fortran::lower::pft::getScopeVariableListMap(mod);
+    for (const auto &var : Fortran::lower::pft::getScopeVariableList(
+             mod.getScope(), scopeVariableListMap)) {
 
-        // Very special case: The value of numeric_storage_size depends on
-        // compilation options and therefore its value is not yet known when
-        // building the builtins runtime. Instead, the parameter is folding a
-        // __numeric_storage_size() expression which is loaded into the user
-        // program. For the iso_fortran_env object file, omit the symbol as it
-        // is never used.
-        if (var.hasSymbol()) {
-          const Fortran::semantics::Symbol &sym = var.getSymbol();
-          const Fortran::semantics::Scope &owner = sym.owner();
-          if (sym.name() == "numeric_storage_size" && owner.IsModule() &&
-              DEREF(owner.symbol()).name() == "iso_fortran_env")
-            continue;
-        }
+      // Only define the variables owned by this module.
+      const Fortran::semantics::Scope *owningScope = var.getOwningScope();
+      if (owningScope && mod.getScope() != *owningScope)
+        continue;
 
-        Fortran::lower::defineModuleVariable(*this, var);
+      // Very special case: The value of numeric_storage_size depends on
+      // compilation options and therefore its value is not yet known when
+      // building the builtins runtime. Instead, the parameter is folding a
+      // __numeric_storage_size() expression which is loaded into the user
+      // program. For the iso_fortran_env object file, omit the symbol as it
+      // is never used.
+      if (var.hasSymbol()) {
+        const Fortran::semantics::Symbol &sym = var.getSymbol();
+        const Fortran::semantics::Scope &owner = sym.owner();
+        if (sym.name() == "numeric_storage_size" && owner.IsModule() &&
+            DEREF(owner.symbol()).name() == "iso_fortran_env")
+          continue;
       }
+
+      Fortran::lower::defineModuleVariable(*this, var);
+    }
       for (auto &eval : mod.evaluationList)
         genFIR(eval);
-    });
   }
 
   /// Lower functions contained in a module.



More information about the flang-commits mailing list