[Mlir-commits] [mlir] [mlir][acc] Introduce ACCImplicitDeclare pass for globals handling (PR #169720)
Razvan Lupusoru
llvmlistbot at llvm.org
Wed Nov 26 13:07:24 PST 2025
https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/169720
>From 630c69bfcf2421919840bfebe0a3c39eb9236136 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:03:58 -0800
Subject: [PATCH 01/10] [mlir][acc] Introduce ACCImplicitDeclare pass for
globals handling
This commit introduces the ACCImplicitDeclare pass to the
OpenACC dialect, complementing ACCImplicitData by handling
global variables referenced in OpenACC compute regions and
routines.
Overview:
---------
The pass applies implicit `acc declare` actions to global
variables referenced in OpenACC regions. While the
OpenACC spec focuses on implicit data mapping (handled by
ACCImplicitData), implicit declare is advantageous and
required for specific cases:
1. Globals referenced in implicit `acc routine` - Since data
mapping only applies to compute regions, globals in
routines must use `acc declare`.
2. Compiler-generated globals - Type descriptors, runtime
names, and error reporting strings introduced during
compilation that wouldn't be visible for user-provided
`acc declare` directives.
3. Constant globals - Constants like filename strings or
initialization values benefit from being marked with
`acc declare` rather than being mapped repeatedly (e.g.,
1000 kernel launches shouldn't map the same constant
1000 times).
Implementation:
---------------
The pass performs this in two phases:
1. Hoisting: Non-constant globals in compute regions have
their address-of operations hoisted out of the region
when possible, allowing implicit data mapping instead
of declare marking.
2. Declaration: Remaining that must be device available
(constants, globals in routines, globals in recipe
operations) are marked with the acc.declare attribute.
The pass processes:
- OpenACC compute constructs (parallel, kernels, serial)
- Functions marked with acc routine
- Private, firstprivate, and reduction recipes (when used)
- Initialization regions of existing declared globals
Requirements:
-------------
The pass requires operations to implement:
- acc::AddressOfGlobalOpInterface (for address-of ops)
- acc::GlobalVariableOpInterface (for global definitions)
- acc::IndirectGlobalAccessOpInterface (for indirect access)
---
mlir/include/mlir/Dialect/OpenACC/OpenACC.h | 5 +
.../mlir/Dialect/OpenACC/Transforms/Passes.td | 28 ++
.../OpenACC/Transforms/ACCImplicitDeclare.cpp | 441 ++++++++++++++++++
.../Dialect/OpenACC/Transforms/CMakeLists.txt | 1 +
.../Dialect/OpenACC/acc-implicit-declare.mlir | 175 +++++++
5 files changed, 650 insertions(+)
create mode 100644 mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
create mode 100644 mlir/test/Dialect/OpenACC/acc-implicit-declare.mlir
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index 8571a2d9d0939..252a78648dd74 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -180,6 +180,11 @@ static constexpr StringLiteral getRoutineInfoAttrName() {
return StringLiteral("acc.routine_info");
}
+/// Used to check whether the current operation is an `acc routine`
+inline bool isAccRoutineOp(mlir::Operation *op) {
+ return op->hasAttr(mlir::acc::getRoutineInfoAttrName());
+}
+
static constexpr StringLiteral getFromDefaultClauseAttrName() {
return StringLiteral("acc.from_default");
}
diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
index cad78df2fbb0b..713aaabee65f0 100644
--- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
@@ -63,6 +63,34 @@ def ACCImplicitData : Pass<"acc-implicit-data", "mlir::ModuleOp"> {
];
}
+def ACCImplicitDeclare : Pass<"acc-implicit-declare", "mlir::ModuleOp"> {
+ let summary = "Applies implicit acc declare to globals referenced in compute and routine acc regions";
+ let description = [{
+ This pass applies implicit `acc declare` actions to global variables
+ referenced in OpenACC compute regions and routine functions.
+
+ The pass performs the following actions:
+
+ 1. Hoists address-of operations for non-constant globals out of OpenACC
+ regions when they can be implicitly mapped rather than declared.
+
+ 2. Collects global symbols referenced in:
+ - OpenACC compute constructs (parallel, kernels, serial)
+ - Functions marked with acc routine
+ - Initialization regions of existing acc declare globals
+ - Private/firstprivate/reduction recipe operations
+
+ 3. Marks collected globals with the acc.declare attribute using the
+ copyin data clause.
+
+ The pass avoids unnecessary declare marking by:
+ - Skipping function symbols (which use acc routine instead)
+ - Hoisting non-constant global references that can use implicit mapping
+ - Only processing symbols that are not already valid in device regions
+ }];
+ let dependentDialects = ["mlir::acc::OpenACCDialect"];
+}
+
def ACCImplicitRoutine : Pass<"acc-implicit-routine", "mlir::ModuleOp"> {
let summary = "Generate implicit acc routine for functions in acc regions";
let description = [{
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
new file mode 100644
index 0000000000000..6585b96762326
--- /dev/null
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -0,0 +1,441 @@
+//===- ACCImplicitDeclare.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass applies implicit `acc declare` actions to global variables
+// referenced in OpenACC compute regions and routine functions.
+//
+// Overview:
+// ---------
+// Global references in an acc regions (for globals not marked with `acc
+// declare` by the user) can be handled in one of two ways:
+// - Mapped through data clauses
+// - Implicitly marked as `acc declare` (this pass)
+//
+// Thus, the OpenACC specification focuses solely on implicit data mapping rules
+// whose implementation is captured in `ACCImplicitData` pass.
+//
+// However, it is both advantageous and required for certain cases to
+// use implicit `acc declare` instead:
+// - Any functions that are implicitly marked as `acc routine` through
+// `ACCImplicitRoutine` may reference globals. Since data mapping
+// is only possible for compute regions, such globals can only be
+// made available on device through `acc declare`.
+// - Compiler can generate and use globals for cases needed in IR
+// representation such as type descriptors or various names needed for
+// runtime calls and error reporting - such cases often are introduced
+// after a frontend semantic checking is done since it is related to
+// implementation detail. Thus, such compiler generated globals would
+// not have been visible for a user to mark with `acc declare`.
+// - Constant globals such as filename strings or data initialization values
+// are values that do not get mutated but are still needed for appropriate
+// runtime execution. If a kernel is launched 1000 times, it is not a
+// good idea to map such a global 1000 times. Therefore, such globals
+// benefit from being marked with `acc declare`.
+//
+// This pass automatically
+// marks global variables with the `acc.declare` attribute when they are
+// referenced in OpenACC compute constructs or routine functions and meet
+// the criteria noted above, ensuring
+// they are properly handled for device execution.
+//
+// The pass performs two main optimizations:
+//
+// 1. Hoisting: For non-constant globals referenced in compute regions, the
+// pass hoists the address-of operation out of the region when possible,
+// allowing them to be implicitly mapped through normal data clause
+// mechanisms rather than requiring declare marking.
+//
+// 2. Declaration: For globals that must be available on the device (constants,
+// globals in routines, globals in recipe operations), the pass adds the
+// `acc.declare` attribute with the copyin data clause.
+//
+// Requirements:
+// -------------
+// To use this pass in a pipeline, the following requirements must be met:
+//
+// 1. Operation Interface Implementation: Operations that compute addresses
+// of global variables must implement the `acc::AddressOfGlobalOpInterface`
+// and those that represent globals must implement the
+// `acc::GlobalOpInterface`. Additionally, any operations that indirectly
+// access globals must implement the `acc::IndirectGlobalAccessOpInterface`.
+//
+// 2. Analysis Registration (Optional): If custom behavior is needed for
+// determining if a symbol use is valid within GPU regions, the dialect
+// should pre-register the `acc::OpenACCSupport` analysis.
+//
+// Examples:
+// ---------
+//
+// Example 1: Non-constant global in compute region (hoisted)
+//
+// Before:
+// memref.global @g_scalar : memref<f32> = dense<0.0>
+// func.func @test() {
+// acc.serial {
+// %addr = memref.get_global @g_scalar : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// After:
+// memref.global @g_scalar : memref<f32> = dense<0.0>
+// func.func @test() {
+// %addr = memref.get_global @g_scalar : memref<f32>
+// acc.serial {
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// Example 2: Constant global in compute region (declared)
+//
+// Before:
+// memref.global constant @g_const : memref<f32> = dense<1.0>
+// func.func @test() {
+// acc.serial {
+// %addr = memref.get_global @g_const : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// After:
+// memref.global constant @g_const : memref<f32> = dense<1.0>
+// {acc.declare = #acc.declare<dataClause = acc_copyin>}
+// func.func @test() {
+// acc.serial {
+// %addr = memref.get_global @g_const : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// Example 3: Global in acc routine (declared)
+//
+// Before:
+// memref.global @g_data : memref<f32> = dense<0.0>
+// acc.routine @routine_0 func(@device_func)
+// func.func @device_func() attributes {acc.routine_info = ...} {
+// %addr = memref.get_global @g_data : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// }
+//
+// After:
+// memref.global @g_data : memref<f32> = dense<0.0>
+// {acc.declare = #acc.declare<dataClause = acc_copyin>}
+// acc.routine @routine_0 func(@device_func)
+// func.func @device_func() attributes {acc.routine_info = ...} {
+// %addr = memref.get_global @g_data : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// }
+//
+// Example 4: Global in private recipe (declared if recipe is used)
+//
+// Before:
+// memref.global @g_init : memref<f32> = dense<0.0>
+// acc.private.recipe @priv_recipe : memref<f32> init {
+// ^bb0(%arg0: memref<f32>):
+// %alloc = memref.alloc() : memref<f32>
+// %global = memref.get_global @g_init : memref<f32>
+// %val = memref.load %global[] : memref<f32>
+// memref.store %val, %alloc[] : memref<f32>
+// acc.yield %alloc : memref<f32>
+// } destroy { ... }
+// func.func @test() {
+// %var = memref.alloc() : memref<f32>
+// %priv = acc.private varPtr(%var : memref<f32>)
+// recipe(@priv_recipe) -> memref<f32>
+// acc.parallel private(%priv : memref<f32>) { ... }
+// }
+//
+// After:
+// memref.global @g_init : memref<f32> = dense<0.0>
+// {acc.declare = #acc.declare<dataClause = acc_copyin>}
+// acc.private.recipe @priv_recipe : memref<f32> init {
+// ^bb0(%arg0: memref<f32>):
+// %alloc = memref.alloc() : memref<f32>
+// %global = memref.get_global @g_init : memref<f32>
+// %val = memref.load %global[] : memref<f32>
+// memref.store %val, %alloc[] : memref<f32>
+// acc.yield %alloc : memref<f32>
+// } destroy { ... }
+// func.func @test() {
+// %var = memref.alloc() : memref<f32>
+// %priv = acc.private varPtr(%var : memref<f32>)
+// recipe(@priv_recipe) -> memref<f32>
+// acc.parallel private(%priv : memref<f32>) { ... }
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
+
+#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+namespace mlir {
+namespace acc {
+#define GEN_PASS_DEF_ACCIMPLICITDECLARE
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
+} // namespace acc
+} // namespace mlir
+
+#define DEBUG_TYPE "acc-implicit-declare"
+
+using namespace mlir;
+
+namespace {
+
+using GlobalOpSetT = llvm::SmallSetVector<Operation *, 16>;
+
+/// Checks whether a use of the requested `globalOp` should be considered
+/// for hoisting out of acc region due to avoid `acc declare`ing something
+/// that instead should be implicitly mapped.
+static bool isGlobalUseCandidateForHoisting(Operation *globalOp,
+ Operation *user,
+ SymbolRefAttr symbol,
+ acc::OpenACCSupport &accSupport) {
+ if (accSupport.isValidSymbolUse(user, symbol)) {
+ // This symbol is valid in GPU region. This means semantics
+ // would change if moved to host - therefore it is not a candidate.
+ return false;
+ }
+
+ bool isConstant = false;
+ bool isFunction = false;
+
+ if (auto globalVarOp =
+ dyn_cast<mlir::acc::GlobalVariableOpInterface>(globalOp)) {
+ isConstant = globalVarOp.isConstant();
+ }
+
+ if (isa<mlir::FunctionOpInterface>(globalOp)) {
+ isFunction = true;
+ }
+
+ // Constants should be kept in device code to ensure they are duplicated.
+ // Function references should be kept in device code to ensure their device
+ // addresses are computed. Everything else should be hoisted since we already
+ // proved they are not valid symbols in GPU region.
+ return !isConstant && !isFunction;
+}
+
+/// Checks whether it is valid to use acc.declare marking on the global.
+bool isValidForAccDeclare(Operation *globalOp) {
+ // For functions - we use acc.routine marking instead.
+ return !isa<mlir::FunctionOpInterface>(globalOp);
+}
+
+/// Checks whether a recipe operation has meaningful use of its symbol that
+/// justifies processing its regions for global references. Returns false if:
+/// 1. The recipe has no symbol uses at all, or
+/// 2. The only symbol use is the recipe's own symbol definition
+template <typename RecipeOpT>
+static bool hasRelevantRecipeUse(RecipeOpT recipeOp) {
+ auto moduleOp = recipeOp->template getParentOfType<mlir::ModuleOp>();
+ std::optional<mlir::SymbolTable::UseRange> symbolUses =
+ recipeOp.getSymbolUses(moduleOp);
+
+ // No recipe symbol uses.
+ if (!symbolUses.has_value() || symbolUses->empty()) {
+ return false;
+ }
+
+ // If more than one use, assume it's used.
+ auto begin = symbolUses->begin();
+ auto end = symbolUses->end();
+ if (begin != end && std::next(begin) != end) {
+ return true;
+ }
+
+ // If single use, check if the use is the recipe itself.
+ const auto &use = *symbolUses->begin();
+ return use.getUser() != recipeOp.getOperation();
+}
+
+// Hoists addr_of operations for non-constant globals out of OpenACC regions.
+// This way - they are implicitly mapped instead of being considered for
+// implicit declare.
+template <typename AccConstructT>
+static void hoistNonConstantDirectUses(AccConstructT accOp,
+ acc::OpenACCSupport &accSupport) {
+ accOp.walk([&](mlir::acc::AddressOfGlobalOpInterface addrOfOp) {
+ SymbolRefAttr symRef = addrOfOp.getSymbol();
+ if (symRef) {
+ Operation *globalOp =
+ SymbolTable::lookupNearestSymbolFrom(addrOfOp, symRef);
+ if (isGlobalUseCandidateForHoisting(globalOp, addrOfOp, symRef,
+ accSupport)) {
+ addrOfOp->moveBefore(accOp);
+ LLVM_DEBUG(
+ llvm::dbgs() << "Hoisted:\n\t" << addrOfOp << "\n\tfrom:\n\t";
+ accOp->print(llvm::dbgs(),
+ OpPrintingFlags{}.skipRegions().enableDebugInfo());
+ llvm::dbgs() << "\n");
+ }
+ }
+ });
+}
+
+// Collects the globals referenced in a device region
+static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
+ GlobalOpSetT &globals,
+ acc::OpenACCSupport &accSupport) {
+ auto mod = region.getParentOfType<mlir::ModuleOp>();
+ auto symTab = SymbolTable(mod);
+ region.walk([&](Operation *op) {
+ // 1) Only consider relevant operations which use symbols
+ auto addrOfOp = dyn_cast<mlir::acc::AddressOfGlobalOpInterface>(op);
+ if (addrOfOp) {
+ SymbolRefAttr symRef = addrOfOp.getSymbol();
+ // 2) Found an operation which uses the symbol. Next determine if it
+ // is a candidate for `acc declare`. Some of the criteria considered
+ // is whether this symbol is not already a device one (either because
+ // acc declare is already used or this is a CUF global).
+ Operation *globalOp = nullptr;
+ bool isCandidate = !accSupport.isValidSymbolUse(op, symRef, &globalOp);
+ if (isCandidate && globalOp && isValidForAccDeclare(globalOp)) {
+ // 3) Add the candidate to the set of globals to be `acc declare`d.
+ globals.insert(globalOp);
+ }
+ } else if (auto indirectAccessOp =
+ dyn_cast<mlir::acc::IndirectGlobalAccessOpInterface>(op)) {
+ // Process operations that indirectly access globals
+ llvm::SmallVector<SymbolRefAttr> symbols;
+ indirectAccessOp.getReferencedSymbols(symbols, &symTab);
+ for (auto symRef : symbols) {
+ if (Operation *globalOp = SymbolTable::lookupSymbolIn(mod, symRef)) {
+ if (isValidForAccDeclare(globalOp)) {
+ globals.insert(globalOp);
+ }
+ }
+ }
+ }
+ });
+}
+
+// Adds the declare attribute to the operation `op`.
+static void addDeclareAttr(MLIRContext *context, Operation *op,
+ acc::DataClause clause) {
+ if (!op) {
+ return;
+ }
+ op->setAttr(acc::getDeclareAttrName(),
+ acc::DeclareAttr::get(context,
+ acc::DataClauseAttr::get(context, clause)));
+}
+
+// This pass applies implicit declare actions for globals referenced in
+// OpenACC compute and routine regions.
+class ACCImplicitDeclare
+ : public acc::impl::ACCImplicitDeclareBase<ACCImplicitDeclare> {
+public:
+ using ACCImplicitDeclareBase<ACCImplicitDeclare>::ACCImplicitDeclareBase;
+
+ void runOnOperation() override {
+ ModuleOp module = getOperation();
+ auto *context = &getContext();
+ acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
+
+ // 1) Start off by hoisting any AddressOf operations out of acc region
+ // for any cases we do not want to `acc declare`. This is because we can
+ // rely on implicit data mapping in majority of cases without uselessly
+ // polluting the device globals.
+ module.walk([&](Operation *op) {
+ TypeSwitch<Operation *, void>(op)
+ .Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
+ [&](auto accOp) {
+ hoistNonConstantDirectUses(accOp, accSupport);
+ });
+ });
+
+ // 2) Collect global symbols which need to be `acc declare`d. Do it for
+ // compute regions, acc routine, and existing globals with the declare
+ // attribute.
+ GlobalOpSetT globalsToAccDeclare;
+ module.walk([&](Operation *op) {
+ TypeSwitch<Operation *, void>(op)
+ .Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
+ [&](auto accOp) {
+ collectGlobalsFromDeviceRegion(accOp.getRegion(),
+ globalsToAccDeclare, accSupport);
+ })
+ .Case<mlir::FunctionOpInterface>([&](auto func) {
+ if (acc::isAccRoutineOp(func) && !func.isExternal()) {
+ collectGlobalsFromDeviceRegion(func.getFunctionBody(),
+ globalsToAccDeclare, accSupport);
+ }
+ })
+ .Case<mlir::acc::GlobalVariableOpInterface>([&](auto globalVarOp) {
+ if (globalVarOp->getAttr(acc::getDeclareAttrName())) {
+ if (mlir::Region *initRegion = globalVarOp.getInitRegion()) {
+ collectGlobalsFromDeviceRegion(*initRegion, globalsToAccDeclare,
+ accSupport);
+ }
+ }
+ })
+ .Case<acc::PrivateRecipeOp>([&](auto privateRecipe) {
+ if (hasRelevantRecipeUse(privateRecipe)) {
+ collectGlobalsFromDeviceRegion(privateRecipe.getInitRegion(),
+ globalsToAccDeclare, accSupport);
+ collectGlobalsFromDeviceRegion(privateRecipe.getDestroyRegion(),
+ globalsToAccDeclare, accSupport);
+ }
+ })
+ .Case<acc::FirstprivateRecipeOp>([&](auto firstprivateRecipe) {
+ if (hasRelevantRecipeUse(firstprivateRecipe)) {
+ collectGlobalsFromDeviceRegion(firstprivateRecipe.getInitRegion(),
+ globalsToAccDeclare, accSupport);
+ collectGlobalsFromDeviceRegion(
+ firstprivateRecipe.getDestroyRegion(), globalsToAccDeclare,
+ accSupport);
+ collectGlobalsFromDeviceRegion(firstprivateRecipe.getCopyRegion(),
+ globalsToAccDeclare, accSupport);
+ }
+ })
+ .Case<acc::ReductionRecipeOp>([&](auto reductionRecipe) {
+ if (hasRelevantRecipeUse(reductionRecipe)) {
+ collectGlobalsFromDeviceRegion(reductionRecipe.getInitRegion(),
+ globalsToAccDeclare, accSupport);
+ collectGlobalsFromDeviceRegion(
+ reductionRecipe.getCombinerRegion(), globalsToAccDeclare,
+ accSupport);
+ }
+ });
+ });
+
+ // 3) Finally, generate the appropriate declare actions needed to ensure
+ // this is considered for device global.
+ for (auto *globalOp : globalsToAccDeclare) {
+ LLVM_DEBUG(
+ llvm::dbgs() << "Global is being `acc declare copyin`d: ";
+ globalOp->print(llvm::dbgs(),
+ OpPrintingFlags{}.skipRegions().enableDebugInfo());
+ llvm::dbgs() << "\n");
+
+ // Mark it as declare copyin.
+ addDeclareAttr(context, globalOp, acc::DataClause::acc_copyin);
+
+ // TODO: May need to create the global constructor which does the mapping
+ // action. It is not yet clear if this is needed yet (since the globals
+ // might just end up in the GPU image without requiring mapping via
+ // runtime).
+ }
+ }
+};
+
+} // namespace
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
index 028af0362f26e..2c6da87c66a11 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
add_mlir_dialect_library(MLIROpenACCTransforms
ACCImplicitData.cpp
+ ACCImplicitDeclare.cpp
ACCImplicitRoutine.cpp
LegalizeDataValues.cpp
diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-declare.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-declare.mlir
new file mode 100644
index 0000000000000..74ff3384c093c
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/acc-implicit-declare.mlir
@@ -0,0 +1,175 @@
+// RUN: mlir-opt %s --pass-pipeline="builtin.module(acc-implicit-declare)" -split-input-file 2>&1 | FileCheck %s
+
+// -----
+
+// Test that non-constant scalar globals in compute regions are hoisted
+// instead of being marked with acc declare
+
+memref.global @gscalar : memref<f32> = dense<0.0>
+
+func.func @test_scalar_in_serial() {
+ acc.serial {
+ %addr = memref.get_global @gscalar : memref<f32>
+ %load = memref.load %addr[] : memref<f32>
+ acc.yield
+ }
+ return
+}
+
+// Expected to hoist this global access out of acc region instead of marking
+// with `acc declare`.
+// CHECK-LABEL: func.func @test_scalar_in_serial
+// CHECK: memref.get_global @gscalar
+// CHECK: acc.serial
+// CHECK-NOT: acc.declare
+
+// -----
+
+// Test that constant globals are marked with acc declare
+
+memref.global constant @gscalarconst : memref<f32> = dense<1.0>
+
+func.func @test_constant_in_serial() {
+ acc.serial {
+ %addr = memref.get_global @gscalarconst : memref<f32>
+ %load = memref.load %addr[] : memref<f32>
+ acc.yield
+ }
+ return
+}
+
+// This is expected to be `acc declare`'d since it is a constant.
+// CHECK: memref.global constant @gscalarconst {{.*}} {acc.declare = #acc.declare<dataClause = acc_copyin>}
+
+// -----
+
+// Test globals referenced in acc routine functions
+
+memref.global @gscalar_routine : memref<f32> = dense<0.0>
+
+acc.routine @acc_routine_0 func(@test_scalar_in_accroutine)
+func.func @test_scalar_in_accroutine() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>} {
+ %addr = memref.get_global @gscalar_routine : memref<f32>
+ %load = memref.load %addr[] : memref<f32>
+ return
+}
+
+// Global should be acc declare'd because it's in an acc routine
+// CHECK: memref.global @gscalar_routine {{.*}} {acc.declare = #acc.declare<dataClause = acc_copyin>}
+
+// -----
+
+// Test constant globals in acc routine
+
+memref.global constant @gscalarconst_routine : memref<f32> = dense<1.0>
+
+acc.routine @acc_routine_0 func(@test_constant_in_accroutine)
+func.func @test_constant_in_accroutine() attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>} {
+ %addr = memref.get_global @gscalarconst_routine : memref<f32>
+ %load = memref.load %addr[] : memref<f32>
+ return
+}
+
+// CHECK: memref.global constant @gscalarconst_routine {{.*}} {acc.declare = #acc.declare<dataClause = acc_copyin>}
+
+// -----
+
+// Test acc.private.recipe with global reference - referenced variant
+
+memref.global @global_for_private : memref<f32> = dense<0.0>
+
+acc.private.recipe @private_recipe_with_global : memref<f32> init {
+^bb0(%arg0: memref<f32>):
+ %0 = memref.alloc() : memref<f32>
+ %global_addr = memref.get_global @global_for_private : memref<f32>
+ %global_val = memref.load %global_addr[] : memref<f32>
+ memref.store %global_val, %0[] : memref<f32>
+ acc.yield %0 : memref<f32>
+} destroy {
+^bb0(%arg0: memref<f32>):
+ memref.dealloc %arg0 : memref<f32>
+ acc.terminator
+}
+
+func.func @test_private_recipe_referenced() {
+ %var = memref.alloc() : memref<f32>
+ %priv = acc.private varPtr(%var : memref<f32>) recipe(@private_recipe_with_global) -> memref<f32>
+ acc.parallel private(%priv : memref<f32>) {
+ %load = memref.load %var[] : memref<f32>
+ acc.yield
+ }
+ memref.dealloc %var : memref<f32>
+ return
+}
+
+// Global should be acc declare'd because the recipe is referenced
+// CHECK: memref.global @global_for_private {{.*}} {acc.declare = #acc.declare<dataClause = acc_copyin>}
+
+// -----
+
+// Test acc.private.recipe with global reference - unreferenced variant
+
+memref.global @global_for_private_unused : memref<f32> = dense<0.0>
+
+acc.private.recipe @private_recipe_unused : memref<f32> init {
+^bb0(%arg0: memref<f32>):
+ %0 = memref.alloc() : memref<f32>
+ %global_addr = memref.get_global @global_for_private_unused : memref<f32>
+ %global_val = memref.load %global_addr[] : memref<f32>
+ memref.store %global_val, %0[] : memref<f32>
+ acc.yield %0 : memref<f32>
+} destroy {
+^bb0(%arg0: memref<f32>):
+ memref.dealloc %arg0 : memref<f32>
+ acc.terminator
+}
+
+func.func @test_private_recipe_not_referenced() {
+ %var = memref.alloc() : memref<f32>
+ acc.parallel {
+ %load = memref.load %var[] : memref<f32>
+ acc.yield
+ }
+ memref.dealloc %var : memref<f32>
+ return
+}
+
+// Global should NOT be acc declare'd because the recipe is not referenced
+// CHECK-NOT: memref.global @global_for_private_unused {{.*}} {acc.declare
+
+// -----
+
+// Test globals in different compute constructs (parallel, kernels, serial)
+
+memref.global @global_parallel : memref<f32> = dense<0.0>
+memref.global @global_kernels : memref<f32> = dense<0.0>
+memref.global constant @global_serial_const : memref<f32> = dense<1.0>
+
+func.func @test_multiple_constructs() {
+ acc.parallel {
+ %addr = memref.get_global @global_parallel : memref<f32>
+ %load = memref.load %addr[] : memref<f32>
+ acc.yield
+ }
+ acc.kernels {
+ %addr = memref.get_global @global_kernels : memref<f32>
+ %load = memref.load %addr[] : memref<f32>
+ acc.terminator
+ }
+ acc.serial {
+ %addr = memref.get_global @global_serial_const : memref<f32>
+ %load = memref.load %addr[] : memref<f32>
+ acc.yield
+ }
+ return
+}
+
+// Non-constant globals ARE hoisted before their compute regions
+// Constant global should be marked with acc.declare
+// CHECK: memref.global constant @global_serial_const {{.*}} {acc.declare = #acc.declare<dataClause = acc_copyin>}
+// CHECK-LABEL: func.func @test_multiple_constructs
+// CHECK: memref.get_global @global_parallel
+// CHECK-NEXT: acc.parallel
+// CHECK: memref.get_global @global_kernels
+// CHECK-NEXT: acc.kernels
+
>From 4db3a56f60765f89c0db221e6eebe1fc23d63a0b Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:25:09 -0800
Subject: [PATCH 02/10] Fix braces
---
.../OpenACC/Transforms/ACCImplicitDeclare.cpp | 46 +++++++------------
1 file changed, 17 insertions(+), 29 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index 6585b96762326..9e87067f990d8 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -209,23 +209,20 @@ static bool isGlobalUseCandidateForHoisting(Operation *globalOp,
Operation *user,
SymbolRefAttr symbol,
acc::OpenACCSupport &accSupport) {
- if (accSupport.isValidSymbolUse(user, symbol)) {
- // This symbol is valid in GPU region. This means semantics
- // would change if moved to host - therefore it is not a candidate.
+ // This symbol is valid in GPU region. This means semantics
+ // would change if moved to host - therefore it is not a candidate.
+ if (accSupport.isValidSymbolUse(user, symbol))
return false;
- }
bool isConstant = false;
bool isFunction = false;
if (auto globalVarOp =
- dyn_cast<mlir::acc::GlobalVariableOpInterface>(globalOp)) {
+ dyn_cast<mlir::acc::GlobalVariableOpInterface>(globalOp))
isConstant = globalVarOp.isConstant();
- }
- if (isa<mlir::FunctionOpInterface>(globalOp)) {
+ if (isa<mlir::FunctionOpInterface>(globalOp))
isFunction = true;
- }
// Constants should be kept in device code to ensure they are duplicated.
// Function references should be kept in device code to ensure their device
@@ -251,16 +248,14 @@ static bool hasRelevantRecipeUse(RecipeOpT recipeOp) {
recipeOp.getSymbolUses(moduleOp);
// No recipe symbol uses.
- if (!symbolUses.has_value() || symbolUses->empty()) {
+ if (!symbolUses.has_value() || symbolUses->empty())
return false;
- }
// If more than one use, assume it's used.
auto begin = symbolUses->begin();
auto end = symbolUses->end();
- if (begin != end && std::next(begin) != end) {
+ if (begin != end && std::next(begin) != end)
return true;
- }
// If single use, check if the use is the recipe itself.
const auto &use = *symbolUses->begin();
@@ -308,22 +303,18 @@ static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
// acc declare is already used or this is a CUF global).
Operation *globalOp = nullptr;
bool isCandidate = !accSupport.isValidSymbolUse(op, symRef, &globalOp);
- if (isCandidate && globalOp && isValidForAccDeclare(globalOp)) {
- // 3) Add the candidate to the set of globals to be `acc declare`d.
+ // 3) Add the candidate to the set of globals to be `acc declare`d.
+ if (isCandidate && globalOp && isValidForAccDeclare(globalOp))
globals.insert(globalOp);
- }
} else if (auto indirectAccessOp =
dyn_cast<mlir::acc::IndirectGlobalAccessOpInterface>(op)) {
// Process operations that indirectly access globals
llvm::SmallVector<SymbolRefAttr> symbols;
indirectAccessOp.getReferencedSymbols(symbols, &symTab);
- for (auto symRef : symbols) {
- if (Operation *globalOp = SymbolTable::lookupSymbolIn(mod, symRef)) {
- if (isValidForAccDeclare(globalOp)) {
+ for (auto symRef : symbols)
+ if (Operation *globalOp = SymbolTable::lookupSymbolIn(mod, symRef))
+ if (isValidForAccDeclare(globalOp))
globals.insert(globalOp);
- }
- }
- }
}
});
}
@@ -331,9 +322,9 @@ static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
// Adds the declare attribute to the operation `op`.
static void addDeclareAttr(MLIRContext *context, Operation *op,
acc::DataClause clause) {
- if (!op) {
+ if (!op)
return;
- }
+
op->setAttr(acc::getDeclareAttrName(),
acc::DeclareAttr::get(context,
acc::DataClauseAttr::get(context, clause)));
@@ -375,18 +366,15 @@ class ACCImplicitDeclare
globalsToAccDeclare, accSupport);
})
.Case<mlir::FunctionOpInterface>([&](auto func) {
- if (acc::isAccRoutineOp(func) && !func.isExternal()) {
+ if (acc::isAccRoutineOp(func) && !func.isExternal())
collectGlobalsFromDeviceRegion(func.getFunctionBody(),
globalsToAccDeclare, accSupport);
- }
})
.Case<mlir::acc::GlobalVariableOpInterface>([&](auto globalVarOp) {
- if (globalVarOp->getAttr(acc::getDeclareAttrName())) {
- if (mlir::Region *initRegion = globalVarOp.getInitRegion()) {
+ if (globalVarOp->getAttr(acc::getDeclareAttrName()))
+ if (mlir::Region *initRegion = globalVarOp.getInitRegion())
collectGlobalsFromDeviceRegion(*initRegion, globalsToAccDeclare,
accSupport);
- }
- }
})
.Case<acc::PrivateRecipeOp>([&](auto privateRecipe) {
if (hasRelevantRecipeUse(privateRecipe)) {
>From 9491a361f1cb9c3e6c156b49582e8fd444dbb83e Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:25:31 -0800
Subject: [PATCH 03/10] Remove unneeded null check
---
mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index 9e87067f990d8..430c4f14e5e0a 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -322,9 +322,6 @@ static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
// Adds the declare attribute to the operation `op`.
static void addDeclareAttr(MLIRContext *context, Operation *op,
acc::DataClause clause) {
- if (!op)
- return;
-
op->setAttr(acc::getDeclareAttrName(),
acc::DeclareAttr::get(context,
acc::DataClauseAttr::get(context, clause)));
>From c84e5f9bd5e0dce1923d336418b838253f529695 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:26:26 -0800
Subject: [PATCH 04/10] Rename module to mod
---
mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index 430c4f14e5e0a..225e9012f88f7 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -335,7 +335,7 @@ class ACCImplicitDeclare
using ACCImplicitDeclareBase<ACCImplicitDeclare>::ACCImplicitDeclareBase;
void runOnOperation() override {
- ModuleOp module = getOperation();
+ ModuleOp mod = getOperation();
auto *context = &getContext();
acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
@@ -343,7 +343,7 @@ class ACCImplicitDeclare
// for any cases we do not want to `acc declare`. This is because we can
// rely on implicit data mapping in majority of cases without uselessly
// polluting the device globals.
- module.walk([&](Operation *op) {
+ mod.walk([&](Operation *op) {
TypeSwitch<Operation *, void>(op)
.Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
[&](auto accOp) {
@@ -355,7 +355,7 @@ class ACCImplicitDeclare
// compute regions, acc routine, and existing globals with the declare
// attribute.
GlobalOpSetT globalsToAccDeclare;
- module.walk([&](Operation *op) {
+ mod.walk([&](Operation *op) {
TypeSwitch<Operation *, void>(op)
.Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
[&](auto accOp) {
>From 9cec252dc64eff3a0185073012b6732a7c91f2ac Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:28:50 -0800
Subject: [PATCH 05/10] Spell out auto
---
.../lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index 225e9012f88f7..b5b1ac40db0c0 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -258,7 +258,7 @@ static bool hasRelevantRecipeUse(RecipeOpT recipeOp) {
return true;
// If single use, check if the use is the recipe itself.
- const auto &use = *symbolUses->begin();
+ const SymbolTable::SymbolUse &use = *symbolUses->begin();
return use.getUser() != recipeOp.getOperation();
}
@@ -311,7 +311,7 @@ static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
// Process operations that indirectly access globals
llvm::SmallVector<SymbolRefAttr> symbols;
indirectAccessOp.getReferencedSymbols(symbols, &symTab);
- for (auto symRef : symbols)
+ for (SymbolRefAttr symRef : symbols)
if (Operation *globalOp = SymbolTable::lookupSymbolIn(mod, symRef))
if (isValidForAccDeclare(globalOp))
globals.insert(globalOp);
@@ -336,7 +336,7 @@ class ACCImplicitDeclare
void runOnOperation() override {
ModuleOp mod = getOperation();
- auto *context = &getContext();
+ MLIRContext *context = &getContext();
acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
// 1) Start off by hoisting any AddressOf operations out of acc region
@@ -405,7 +405,7 @@ class ACCImplicitDeclare
// 3) Finally, generate the appropriate declare actions needed to ensure
// this is considered for device global.
- for (auto *globalOp : globalsToAccDeclare) {
+ for (Operation *globalOp : globalsToAccDeclare) {
LLVM_DEBUG(
llvm::dbgs() << "Global is being `acc declare copyin`d: ";
globalOp->print(llvm::dbgs(),
>From a907c2ae4d6343965d5981d049e152c4a7bd9f97 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:33:58 -0800
Subject: [PATCH 06/10] Pass symbol table from caller
---
.../OpenACC/Transforms/ACCImplicitDeclare.cpp | 33 +++++++++++--------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index b5b1ac40db0c0..7182367db292d 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -289,9 +289,9 @@ static void hoistNonConstantDirectUses(AccConstructT accOp,
// Collects the globals referenced in a device region
static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
GlobalOpSetT &globals,
- acc::OpenACCSupport &accSupport) {
+ acc::OpenACCSupport &accSupport,
+ SymbolTable &symTab) {
auto mod = region.getParentOfType<mlir::ModuleOp>();
- auto symTab = SymbolTable(mod);
region.walk([&](Operation *op) {
// 1) Only consider relevant operations which use symbols
auto addrOfOp = dyn_cast<mlir::acc::AddressOfGlobalOpInterface>(op);
@@ -354,51 +354,58 @@ class ACCImplicitDeclare
// 2) Collect global symbols which need to be `acc declare`d. Do it for
// compute regions, acc routine, and existing globals with the declare
// attribute.
+ SymbolTable symTab(mod);
GlobalOpSetT globalsToAccDeclare;
mod.walk([&](Operation *op) {
TypeSwitch<Operation *, void>(op)
.Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
[&](auto accOp) {
- collectGlobalsFromDeviceRegion(accOp.getRegion(),
- globalsToAccDeclare, accSupport);
+ collectGlobalsFromDeviceRegion(
+ accOp.getRegion(), globalsToAccDeclare, accSupport, symTab);
})
.Case<mlir::FunctionOpInterface>([&](auto func) {
if (acc::isAccRoutineOp(func) && !func.isExternal())
collectGlobalsFromDeviceRegion(func.getFunctionBody(),
- globalsToAccDeclare, accSupport);
+ globalsToAccDeclare, accSupport,
+ symTab);
})
.Case<mlir::acc::GlobalVariableOpInterface>([&](auto globalVarOp) {
if (globalVarOp->getAttr(acc::getDeclareAttrName()))
if (mlir::Region *initRegion = globalVarOp.getInitRegion())
collectGlobalsFromDeviceRegion(*initRegion, globalsToAccDeclare,
- accSupport);
+ accSupport, symTab);
})
.Case<acc::PrivateRecipeOp>([&](auto privateRecipe) {
if (hasRelevantRecipeUse(privateRecipe)) {
collectGlobalsFromDeviceRegion(privateRecipe.getInitRegion(),
- globalsToAccDeclare, accSupport);
+ globalsToAccDeclare, accSupport,
+ symTab);
collectGlobalsFromDeviceRegion(privateRecipe.getDestroyRegion(),
- globalsToAccDeclare, accSupport);
+ globalsToAccDeclare, accSupport,
+ symTab);
}
})
.Case<acc::FirstprivateRecipeOp>([&](auto firstprivateRecipe) {
if (hasRelevantRecipeUse(firstprivateRecipe)) {
collectGlobalsFromDeviceRegion(firstprivateRecipe.getInitRegion(),
- globalsToAccDeclare, accSupport);
+ globalsToAccDeclare, accSupport,
+ symTab);
collectGlobalsFromDeviceRegion(
firstprivateRecipe.getDestroyRegion(), globalsToAccDeclare,
- accSupport);
+ accSupport, symTab);
collectGlobalsFromDeviceRegion(firstprivateRecipe.getCopyRegion(),
- globalsToAccDeclare, accSupport);
+ globalsToAccDeclare, accSupport,
+ symTab);
}
})
.Case<acc::ReductionRecipeOp>([&](auto reductionRecipe) {
if (hasRelevantRecipeUse(reductionRecipe)) {
collectGlobalsFromDeviceRegion(reductionRecipe.getInitRegion(),
- globalsToAccDeclare, accSupport);
+ globalsToAccDeclare, accSupport,
+ symTab);
collectGlobalsFromDeviceRegion(
reductionRecipe.getCombinerRegion(), globalsToAccDeclare,
- accSupport);
+ accSupport, symTab);
}
});
});
>From 52a5123be7372d2872d75b573c9e5572793a8bf9 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:44:55 -0800
Subject: [PATCH 07/10] Lookup directly in symbol table
---
mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index 7182367db292d..1cb27d9846851 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -291,7 +291,6 @@ static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
GlobalOpSetT &globals,
acc::OpenACCSupport &accSupport,
SymbolTable &symTab) {
- auto mod = region.getParentOfType<mlir::ModuleOp>();
region.walk([&](Operation *op) {
// 1) Only consider relevant operations which use symbols
auto addrOfOp = dyn_cast<mlir::acc::AddressOfGlobalOpInterface>(op);
@@ -312,7 +311,8 @@ static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
llvm::SmallVector<SymbolRefAttr> symbols;
indirectAccessOp.getReferencedSymbols(symbols, &symTab);
for (SymbolRefAttr symRef : symbols)
- if (Operation *globalOp = SymbolTable::lookupSymbolIn(mod, symRef))
+ if (auto globalOp = symTab.lookup<mlir::acc::GlobalVariableOpInterface>(
+ symRef.getLeafReference()))
if (isValidForAccDeclare(globalOp))
globals.insert(globalOp);
}
>From b12abe194b579c47199e3e7bbbf108a64b8c6304 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:46:22 -0800
Subject: [PATCH 08/10] Remove mlir:: use to make things consistent
---
.../OpenACC/Transforms/ACCImplicitDeclare.cpp | 27 +++++++++----------
1 file changed, 13 insertions(+), 14 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index 1cb27d9846851..2ad6311270534 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -217,11 +217,10 @@ static bool isGlobalUseCandidateForHoisting(Operation *globalOp,
bool isConstant = false;
bool isFunction = false;
- if (auto globalVarOp =
- dyn_cast<mlir::acc::GlobalVariableOpInterface>(globalOp))
+ if (auto globalVarOp = dyn_cast<acc::GlobalVariableOpInterface>(globalOp))
isConstant = globalVarOp.isConstant();
- if (isa<mlir::FunctionOpInterface>(globalOp))
+ if (isa<FunctionOpInterface>(globalOp))
isFunction = true;
// Constants should be kept in device code to ensure they are duplicated.
@@ -234,7 +233,7 @@ static bool isGlobalUseCandidateForHoisting(Operation *globalOp,
/// Checks whether it is valid to use acc.declare marking on the global.
bool isValidForAccDeclare(Operation *globalOp) {
// For functions - we use acc.routine marking instead.
- return !isa<mlir::FunctionOpInterface>(globalOp);
+ return !isa<FunctionOpInterface>(globalOp);
}
/// Checks whether a recipe operation has meaningful use of its symbol that
@@ -243,8 +242,8 @@ bool isValidForAccDeclare(Operation *globalOp) {
/// 2. The only symbol use is the recipe's own symbol definition
template <typename RecipeOpT>
static bool hasRelevantRecipeUse(RecipeOpT recipeOp) {
- auto moduleOp = recipeOp->template getParentOfType<mlir::ModuleOp>();
- std::optional<mlir::SymbolTable::UseRange> symbolUses =
+ auto moduleOp = recipeOp->template getParentOfType<ModuleOp>();
+ std::optional<SymbolTable::UseRange> symbolUses =
recipeOp.getSymbolUses(moduleOp);
// No recipe symbol uses.
@@ -268,7 +267,7 @@ static bool hasRelevantRecipeUse(RecipeOpT recipeOp) {
template <typename AccConstructT>
static void hoistNonConstantDirectUses(AccConstructT accOp,
acc::OpenACCSupport &accSupport) {
- accOp.walk([&](mlir::acc::AddressOfGlobalOpInterface addrOfOp) {
+ accOp.walk([&](acc::AddressOfGlobalOpInterface addrOfOp) {
SymbolRefAttr symRef = addrOfOp.getSymbol();
if (symRef) {
Operation *globalOp =
@@ -287,13 +286,13 @@ static void hoistNonConstantDirectUses(AccConstructT accOp,
}
// Collects the globals referenced in a device region
-static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
+static void collectGlobalsFromDeviceRegion(Region ®ion,
GlobalOpSetT &globals,
acc::OpenACCSupport &accSupport,
SymbolTable &symTab) {
region.walk([&](Operation *op) {
// 1) Only consider relevant operations which use symbols
- auto addrOfOp = dyn_cast<mlir::acc::AddressOfGlobalOpInterface>(op);
+ auto addrOfOp = dyn_cast<acc::AddressOfGlobalOpInterface>(op);
if (addrOfOp) {
SymbolRefAttr symRef = addrOfOp.getSymbol();
// 2) Found an operation which uses the symbol. Next determine if it
@@ -306,12 +305,12 @@ static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
if (isCandidate && globalOp && isValidForAccDeclare(globalOp))
globals.insert(globalOp);
} else if (auto indirectAccessOp =
- dyn_cast<mlir::acc::IndirectGlobalAccessOpInterface>(op)) {
+ dyn_cast<acc::IndirectGlobalAccessOpInterface>(op)) {
// Process operations that indirectly access globals
llvm::SmallVector<SymbolRefAttr> symbols;
indirectAccessOp.getReferencedSymbols(symbols, &symTab);
for (SymbolRefAttr symRef : symbols)
- if (auto globalOp = symTab.lookup<mlir::acc::GlobalVariableOpInterface>(
+ if (auto globalOp = symTab.lookup<acc::GlobalVariableOpInterface>(
symRef.getLeafReference()))
if (isValidForAccDeclare(globalOp))
globals.insert(globalOp);
@@ -363,15 +362,15 @@ class ACCImplicitDeclare
collectGlobalsFromDeviceRegion(
accOp.getRegion(), globalsToAccDeclare, accSupport, symTab);
})
- .Case<mlir::FunctionOpInterface>([&](auto func) {
+ .Case<FunctionOpInterface>([&](auto func) {
if (acc::isAccRoutineOp(func) && !func.isExternal())
collectGlobalsFromDeviceRegion(func.getFunctionBody(),
globalsToAccDeclare, accSupport,
symTab);
})
- .Case<mlir::acc::GlobalVariableOpInterface>([&](auto globalVarOp) {
+ .Case<acc::GlobalVariableOpInterface>([&](auto globalVarOp) {
if (globalVarOp->getAttr(acc::getDeclareAttrName()))
- if (mlir::Region *initRegion = globalVarOp.getInitRegion())
+ if (Region *initRegion = globalVarOp.getInitRegion())
collectGlobalsFromDeviceRegion(*initRegion, globalsToAccDeclare,
accSupport, symTab);
})
>From 1ce6cda125ca9989286d91027150ad007b6ebd86 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:51:29 -0800
Subject: [PATCH 09/10] Pass in module from parent to hasRelevantRecipeUse
---
.../Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index 2ad6311270534..d9fb08a8b0809 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -241,10 +241,9 @@ bool isValidForAccDeclare(Operation *globalOp) {
/// 1. The recipe has no symbol uses at all, or
/// 2. The only symbol use is the recipe's own symbol definition
template <typename RecipeOpT>
-static bool hasRelevantRecipeUse(RecipeOpT recipeOp) {
- auto moduleOp = recipeOp->template getParentOfType<ModuleOp>();
+static bool hasRelevantRecipeUse(RecipeOpT &recipeOp, ModuleOp &mod) {
std::optional<SymbolTable::UseRange> symbolUses =
- recipeOp.getSymbolUses(moduleOp);
+ recipeOp.getSymbolUses(mod);
// No recipe symbol uses.
if (!symbolUses.has_value() || symbolUses->empty())
@@ -375,7 +374,7 @@ class ACCImplicitDeclare
accSupport, symTab);
})
.Case<acc::PrivateRecipeOp>([&](auto privateRecipe) {
- if (hasRelevantRecipeUse(privateRecipe)) {
+ if (hasRelevantRecipeUse(privateRecipe, mod)) {
collectGlobalsFromDeviceRegion(privateRecipe.getInitRegion(),
globalsToAccDeclare, accSupport,
symTab);
@@ -385,7 +384,7 @@ class ACCImplicitDeclare
}
})
.Case<acc::FirstprivateRecipeOp>([&](auto firstprivateRecipe) {
- if (hasRelevantRecipeUse(firstprivateRecipe)) {
+ if (hasRelevantRecipeUse(firstprivateRecipe, mod)) {
collectGlobalsFromDeviceRegion(firstprivateRecipe.getInitRegion(),
globalsToAccDeclare, accSupport,
symTab);
@@ -398,7 +397,7 @@ class ACCImplicitDeclare
}
})
.Case<acc::ReductionRecipeOp>([&](auto reductionRecipe) {
- if (hasRelevantRecipeUse(reductionRecipe)) {
+ if (hasRelevantRecipeUse(reductionRecipe, mod)) {
collectGlobalsFromDeviceRegion(reductionRecipe.getInitRegion(),
globalsToAccDeclare, accSupport,
symTab);
>From b16d0c242884b0db2f3ad7cd42235aebc7726d07 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Wed, 26 Nov 2025 12:52:37 -0800
Subject: [PATCH 10/10] Use Operation* on lookup
---
mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
index d9fb08a8b0809..67d88a85374cd 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -309,8 +309,7 @@ static void collectGlobalsFromDeviceRegion(Region ®ion,
llvm::SmallVector<SymbolRefAttr> symbols;
indirectAccessOp.getReferencedSymbols(symbols, &symTab);
for (SymbolRefAttr symRef : symbols)
- if (auto globalOp = symTab.lookup<acc::GlobalVariableOpInterface>(
- symRef.getLeafReference()))
+ if (Operation *globalOp = symTab.lookup(symRef.getLeafReference()))
if (isValidForAccDeclare(globalOp))
globals.insert(globalOp);
}
More information about the Mlir-commits
mailing list