[Mlir-commits] [mlir] [mlir][acc] Add pass to insert acc declare globals into GPU module (PR #181383)

Razvan Lupusoru llvmlistbot at llvm.org
Fri Feb 13 11:13:20 PST 2026


https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/181383

>From f15fdea2c8e372e2c62335e9c704caf7c898721d Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Fri, 13 Feb 2026 08:29:19 -0800
Subject: [PATCH 1/2] [mlir][acc] Add pass to insert acc declare globals into
 GPU module

Adds a new OpenACC pass that copies globals with the `acc.declare`
attribute into the GPU module so that device code (acc routine, compute
regions) can reference them.
---
 .../mlir/Dialect/OpenACC/Transforms/Passes.td |   9 ++
 .../ACCDeclareGPUModuleInsertion.cpp          | 125 ++++++++++++++++++
 .../Dialect/OpenACC/Transforms/CMakeLists.txt |   1 +
 .../acc-declare-gpu-module-insertion.mlir     |  14 ++
 4 files changed, 149 insertions(+)
 create mode 100644 mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
 create mode 100644 mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir

diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
index 37243ecf4e1ac..7adab00ffff5d 100644
--- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
@@ -136,6 +136,15 @@ def ACCImplicitRoutine : Pass<"acc-implicit-routine", "mlir::ModuleOp"> {
   ];
 }
 
+def ACCDeclareGPUModuleInsertion : Pass<"acc-declare-gpu-module-insertion", "mlir::ModuleOp"> {
+  let summary = "Copy globals with acc.declare into the GPU module";
+  let description = [{
+    Copies globals that have the `acc.declare` attribute into the GPU module so
+    that device code can reference them.
+  }];
+  let dependentDialects = ["mlir::acc::OpenACCDialect", "mlir::gpu::GPUDialect"];
+}
+
 def ACCLegalizeSerial : Pass<"acc-legalize-serial", "mlir::func::FuncOp"> {
   let summary = "Legalize OpenACC serial constructs";
   let description = [{
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
new file mode 100644
index 0000000000000..35133fb83daa4
--- /dev/null
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
@@ -0,0 +1,125 @@
+//===- ACCDeclareGPUModuleInsertion.cpp
+//------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass copies globals marked with the `acc.declare` attribute into the
+// GPU module so that device code (e.g. acc routine, compute regions) can
+// reference them.
+//
+// Overview:
+// ---------
+// Globals that have the `acc.declare` attribute (from the OpenACC declare
+// directive or from the `ACCImplicitDeclare` pass) must be present in the
+// GPU module for device code to use them. This pass inserts copies of those
+// globals into the GPU module, creating the module if it does not yet exist.
+// The host copy of each global remains in the parent module.
+//
+// Example:
+// --------
+//
+// Before:
+//   module {
+//     memref.global @arr : memref<7xf32> = dense<0.0>
+//         {acc.declare = #acc.declare<dataClause = acc_create>}
+//   }
+//
+// After:
+//   module attributes {gpu.container_module} {
+//     memref.global @arr : memref<7xf32> = dense<0.0>
+//         {acc.declare = #acc.declare<dataClause = acc_create>}
+//     gpu.module @acc_gpu_module {
+//       memref.global @arr : memref<7xf32> = dense<0.0>
+//           {acc.declare = #acc.declare<dataClause = acc_create>}
+//     }
+//   }
+//
+// Requirements:
+// -------------
+// The pass uses the `acc::OpenACCSupport` for:
+// - getOrCreateGPUModule: to obtain or create the GPU module.
+// - emitNYI: to report failure when GPU module creation is not supported.
+// If no custom implementation is registered, the default implementation is
+// used (see OpenACCSupport).
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/SymbolTable.h"
+
+namespace mlir {
+namespace acc {
+#define GEN_PASS_DEF_ACCDECLAREGPUMODULEINSERTION
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
+} // namespace acc
+} // namespace mlir
+
+#define DEBUG_TYPE "acc-declare-gpu-module-insertion"
+
+using namespace mlir;
+
+namespace {
+
+static bool hasAccDeclareGlobals(ModuleOp mod) {
+  for (Operation &op : mod.getBody()->getOperations())
+    if (op.getAttr(acc::getDeclareAttrName()))
+      return true;
+  return false;
+}
+
+class ACCDeclareGPUModuleInsertion
+    : public acc::impl::ACCDeclareGPUModuleInsertionBase<
+          ACCDeclareGPUModuleInsertion> {
+public:
+  using acc::impl::ACCDeclareGPUModuleInsertionBase<
+      ACCDeclareGPUModuleInsertion>::ACCDeclareGPUModuleInsertionBase;
+
+  void copyGlobalsToGPUModule(gpu::GPUModuleOp gpuMod, ModuleOp mod) const {
+    SymbolTable gpuSymTable(gpuMod);
+
+    for (Operation &globalOp : mod.getBody()->getOperations()) {
+      if (!globalOp.getAttr(acc::getDeclareAttrName()))
+        continue;
+
+      StringAttr name =
+          globalOp.getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
+      if (!name)
+        continue;
+
+      if (gpuSymTable.lookup(name.getValue()))
+        continue;
+
+      gpuSymTable.insert(globalOp.clone());
+    }
+  }
+
+  void runOnOperation() override {
+    ModuleOp mod = getOperation();
+
+    // Check for any candidates first - do this to avoid creating the GPU module
+    // if there are no candidates.
+    if (!hasAccDeclareGlobals(mod))
+      return;
+
+    acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
+    std::optional<gpu::GPUModuleOp> gpuMod =
+        accSupport.getOrCreateGPUModule(mod);
+    if (!gpuMod) {
+      accSupport.emitNYI(mod.getLoc(), "Failed to create GPU module");
+      return;
+    }
+
+    copyGlobalsToGPUModule(*gpuMod, mod);
+  }
+};
+
+} // namespace
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
index 20cfcccb9a42d..a1ccdf829550e 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_dialect_library(MLIROpenACCTransforms
+  ACCDeclareGPUModuleInsertion.cpp
   ACCIfClauseLowering.cpp
   ACCImplicitData.cpp
   ACCLoopTiling.cpp
diff --git a/mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir b/mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir
new file mode 100644
index 0000000000000..8d206673ac02d
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt %s -acc-declare-gpu-module-insertion | FileCheck %s
+
+// Test that globals with acc.declare are copied into the GPU module.
+// The host global stays in the module; a copy is inserted into the GPU module.
+
+// CHECK-LABEL: module
+// CHECK: memref.global @arr {{.*}} {acc.declare = #acc.declare<dataClause = acc_create>}
+// CHECK: gpu.module @acc_gpu_module {
+// CHECK: memref.global @arr {{.*}} {acc.declare = #acc.declare<dataClause = acc_create>}
+// CHECK: }
+
+module {
+  memref.global @arr : memref<7xf32> = dense<0.0> {acc.declare = #acc.declare<dataClause = acc_create>}
+}

>From 18b368cb56a458e4c06c70ef4dc8d18ed668621e Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 13 Feb 2026 11:13:07 -0800
Subject: [PATCH 2/2] More robust checking for global insertion including
 equivalence

---
 .../ACCDeclareGPUModuleInsertion.cpp          | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
index 35133fb83daa4..f815245882630 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
@@ -54,6 +54,7 @@
 #include "mlir/Dialect/OpenACC/Transforms/Passes.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/IR/OperationSupport.h"
 #include "mlir/IR/SymbolTable.h"
 
 namespace mlir {
@@ -83,23 +84,41 @@ class ACCDeclareGPUModuleInsertion
   using acc::impl::ACCDeclareGPUModuleInsertionBase<
       ACCDeclareGPUModuleInsertion>::ACCDeclareGPUModuleInsertionBase;
 
-  void copyGlobalsToGPUModule(gpu::GPUModuleOp gpuMod, ModuleOp mod) const {
+  LogicalResult copyGlobalsToGPUModule(gpu::GPUModuleOp gpuMod, ModuleOp mod,
+                                       acc::OpenACCSupport &accSupport) const {
     SymbolTable gpuSymTable(gpuMod);
 
     for (Operation &globalOp : mod.getBody()->getOperations()) {
       if (!globalOp.getAttr(acc::getDeclareAttrName()))
         continue;
 
-      StringAttr name =
-          globalOp.getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
-      if (!name)
+      auto symOp = dyn_cast<SymbolOpInterface>(&globalOp);
+      if (!symOp)
         continue;
 
-      if (gpuSymTable.lookup(name.getValue()))
+      StringAttr name = symOp.getNameAttr();
+
+      if (Operation *existing = gpuSymTable.lookup(name.getValue())) {
+        // Reuse only when the existing GPU symbol is structurally equivalent to
+        // the global we would insert. Otherwise treat as a conflict (different
+        // op type or different definition).
+        if (existing->getName() != globalOp.getName() ||
+            !OperationEquivalence::isEquivalentTo(
+                existing, &globalOp,
+                OperationEquivalence::ignoreValueEquivalence,
+                /*markEquivalent=*/nullptr,
+                OperationEquivalence::IgnoreLocations)) {
+          accSupport.emitNYI(globalOp.getLoc(),
+                             llvm::Twine("duplicate global symbol '") +
+                                 name.getValue() + "' in gpu module");
+          return failure();
+        }
         continue;
+      }
 
       gpuSymTable.insert(globalOp.clone());
     }
+    return success();
   }
 
   void runOnOperation() override {
@@ -118,7 +137,8 @@ class ACCDeclareGPUModuleInsertion
       return;
     }
 
-    copyGlobalsToGPUModule(*gpuMod, mod);
+    if (failed(copyGlobalsToGPUModule(*gpuMod, mod, accSupport)))
+      return;
   }
 };
 



More information about the Mlir-commits mailing list