[Mlir-commits] [mlir] [mlir][acc] Add pass to insert acc declare globals into GPU module (PR #181383)
Razvan Lupusoru
llvmlistbot at llvm.org
Fri Feb 13 11:13:20 PST 2026
https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/181383
>From f15fdea2c8e372e2c62335e9c704caf7c898721d Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Fri, 13 Feb 2026 08:29:19 -0800
Subject: [PATCH 1/2] [mlir][acc] Add pass to insert acc declare globals into
GPU module
Adds a new OpenACC pass that copies globals with the `acc.declare`
attribute into the GPU module so that device code (acc routine, compute
regions) can reference them.
---
.../mlir/Dialect/OpenACC/Transforms/Passes.td | 9 ++
.../ACCDeclareGPUModuleInsertion.cpp | 125 ++++++++++++++++++
.../Dialect/OpenACC/Transforms/CMakeLists.txt | 1 +
.../acc-declare-gpu-module-insertion.mlir | 14 ++
4 files changed, 149 insertions(+)
create mode 100644 mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
create mode 100644 mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir
diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
index 37243ecf4e1ac..7adab00ffff5d 100644
--- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
@@ -136,6 +136,15 @@ def ACCImplicitRoutine : Pass<"acc-implicit-routine", "mlir::ModuleOp"> {
];
}
+def ACCDeclareGPUModuleInsertion : Pass<"acc-declare-gpu-module-insertion", "mlir::ModuleOp"> {
+ let summary = "Copy globals with acc.declare into the GPU module";
+ let description = [{
+ Copies globals that have the `acc.declare` attribute into the GPU module so
+ that device code can reference them.
+ }];
+ let dependentDialects = ["mlir::acc::OpenACCDialect", "mlir::gpu::GPUDialect"];
+}
+
def ACCLegalizeSerial : Pass<"acc-legalize-serial", "mlir::func::FuncOp"> {
let summary = "Legalize OpenACC serial constructs";
let description = [{
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
new file mode 100644
index 0000000000000..35133fb83daa4
--- /dev/null
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
@@ -0,0 +1,125 @@
+//===- ACCDeclareGPUModuleInsertion.cpp
+//------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass copies globals marked with the `acc.declare` attribute into the
+// GPU module so that device code (e.g. acc routine, compute regions) can
+// reference them.
+//
+// Overview:
+// ---------
+// Globals that have the `acc.declare` attribute (from the OpenACC declare
+// directive or from the `ACCImplicitDeclare` pass) must be present in the
+// GPU module for device code to use them. This pass inserts copies of those
+// globals into the GPU module, creating the module if it does not yet exist.
+// The host copy of each global remains in the parent module.
+//
+// Example:
+// --------
+//
+// Before:
+// module {
+// memref.global @arr : memref<7xf32> = dense<0.0>
+// {acc.declare = #acc.declare<dataClause = acc_create>}
+// }
+//
+// After:
+// module attributes {gpu.container_module} {
+// memref.global @arr : memref<7xf32> = dense<0.0>
+// {acc.declare = #acc.declare<dataClause = acc_create>}
+// gpu.module @acc_gpu_module {
+// memref.global @arr : memref<7xf32> = dense<0.0>
+// {acc.declare = #acc.declare<dataClause = acc_create>}
+// }
+// }
+//
+// Requirements:
+// -------------
+// The pass uses the `acc::OpenACCSupport` for:
+// - getOrCreateGPUModule: to obtain or create the GPU module.
+// - emitNYI: to report failure when GPU module creation is not supported.
+// If no custom implementation is registered, the default implementation is
+// used (see OpenACCSupport).
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/SymbolTable.h"
+
+namespace mlir {
+namespace acc {
+#define GEN_PASS_DEF_ACCDECLAREGPUMODULEINSERTION
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
+} // namespace acc
+} // namespace mlir
+
+#define DEBUG_TYPE "acc-declare-gpu-module-insertion"
+
+using namespace mlir;
+
+namespace {
+
+static bool hasAccDeclareGlobals(ModuleOp mod) {
+ for (Operation &op : mod.getBody()->getOperations())
+ if (op.getAttr(acc::getDeclareAttrName()))
+ return true;
+ return false;
+}
+
+class ACCDeclareGPUModuleInsertion
+ : public acc::impl::ACCDeclareGPUModuleInsertionBase<
+ ACCDeclareGPUModuleInsertion> {
+public:
+ using acc::impl::ACCDeclareGPUModuleInsertionBase<
+ ACCDeclareGPUModuleInsertion>::ACCDeclareGPUModuleInsertionBase;
+
+ void copyGlobalsToGPUModule(gpu::GPUModuleOp gpuMod, ModuleOp mod) const {
+ SymbolTable gpuSymTable(gpuMod);
+
+ for (Operation &globalOp : mod.getBody()->getOperations()) {
+ if (!globalOp.getAttr(acc::getDeclareAttrName()))
+ continue;
+
+ StringAttr name =
+ globalOp.getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
+ if (!name)
+ continue;
+
+ if (gpuSymTable.lookup(name.getValue()))
+ continue;
+
+ gpuSymTable.insert(globalOp.clone());
+ }
+ }
+
+ void runOnOperation() override {
+ ModuleOp mod = getOperation();
+
+ // Check for any candidates first - do this to avoid creating the GPU module
+ // if there are no candidates.
+ if (!hasAccDeclareGlobals(mod))
+ return;
+
+ acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
+ std::optional<gpu::GPUModuleOp> gpuMod =
+ accSupport.getOrCreateGPUModule(mod);
+ if (!gpuMod) {
+ accSupport.emitNYI(mod.getLoc(), "Failed to create GPU module");
+ return;
+ }
+
+ copyGlobalsToGPUModule(*gpuMod, mod);
+ }
+};
+
+} // namespace
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
index 20cfcccb9a42d..a1ccdf829550e 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
add_mlir_dialect_library(MLIROpenACCTransforms
+ ACCDeclareGPUModuleInsertion.cpp
ACCIfClauseLowering.cpp
ACCImplicitData.cpp
ACCLoopTiling.cpp
diff --git a/mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir b/mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir
new file mode 100644
index 0000000000000..8d206673ac02d
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/acc-declare-gpu-module-insertion.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt %s -acc-declare-gpu-module-insertion | FileCheck %s
+
+// Test that globals with acc.declare are copied into the GPU module.
+// The host global stays in the module; a copy is inserted into the GPU module.
+
+// CHECK-LABEL: module
+// CHECK: memref.global @arr {{.*}} {acc.declare = #acc.declare<dataClause = acc_create>}
+// CHECK: gpu.module @acc_gpu_module {
+// CHECK: memref.global @arr {{.*}} {acc.declare = #acc.declare<dataClause = acc_create>}
+// CHECK: }
+
+module {
+ memref.global @arr : memref<7xf32> = dense<0.0> {acc.declare = #acc.declare<dataClause = acc_create>}
+}
>From 18b368cb56a458e4c06c70ef4dc8d18ed668621e Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 13 Feb 2026 11:13:07 -0800
Subject: [PATCH 2/2] More robust checking for global insertion including
equivalence
---
.../ACCDeclareGPUModuleInsertion.cpp | 32 +++++++++++++++----
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
index 35133fb83daa4..f815245882630 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCDeclareGPUModuleInsertion.cpp
@@ -54,6 +54,7 @@
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Operation.h"
+#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/SymbolTable.h"
namespace mlir {
@@ -83,23 +84,41 @@ class ACCDeclareGPUModuleInsertion
using acc::impl::ACCDeclareGPUModuleInsertionBase<
ACCDeclareGPUModuleInsertion>::ACCDeclareGPUModuleInsertionBase;
- void copyGlobalsToGPUModule(gpu::GPUModuleOp gpuMod, ModuleOp mod) const {
+ LogicalResult copyGlobalsToGPUModule(gpu::GPUModuleOp gpuMod, ModuleOp mod,
+ acc::OpenACCSupport &accSupport) const {
SymbolTable gpuSymTable(gpuMod);
for (Operation &globalOp : mod.getBody()->getOperations()) {
if (!globalOp.getAttr(acc::getDeclareAttrName()))
continue;
- StringAttr name =
- globalOp.getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
- if (!name)
+ auto symOp = dyn_cast<SymbolOpInterface>(&globalOp);
+ if (!symOp)
continue;
- if (gpuSymTable.lookup(name.getValue()))
+ StringAttr name = symOp.getNameAttr();
+
+ if (Operation *existing = gpuSymTable.lookup(name.getValue())) {
+ // Reuse only when the existing GPU symbol is structurally equivalent to
+ // the global we would insert. Otherwise treat as a conflict (different
+ // op type or different definition).
+ if (existing->getName() != globalOp.getName() ||
+ !OperationEquivalence::isEquivalentTo(
+ existing, &globalOp,
+ OperationEquivalence::ignoreValueEquivalence,
+ /*markEquivalent=*/nullptr,
+ OperationEquivalence::IgnoreLocations)) {
+ accSupport.emitNYI(globalOp.getLoc(),
+ llvm::Twine("duplicate global symbol '") +
+ name.getValue() + "' in gpu module");
+ return failure();
+ }
continue;
+ }
gpuSymTable.insert(globalOp.clone());
}
+ return success();
}
void runOnOperation() override {
@@ -118,7 +137,8 @@ class ACCDeclareGPUModuleInsertion
return;
}
- copyGlobalsToGPUModule(*gpuMod, mod);
+ if (failed(copyGlobalsToGPUModule(*gpuMod, mod, accSupport)))
+ return;
}
};
More information about the Mlir-commits
mailing list