[Mlir-commits] [mlir] [mlir][acc] Introduce ACCImplicitDeclare pass for globals handling (PR #169720)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Nov 26 12:10:24 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Razvan Lupusoru (razvanlupusoru)
<details>
<summary>Changes</summary>
This commit introduces the ACCImplicitDeclare pass to the OpenACC dialect, complementing ACCImplicitData by handling global variables referenced in OpenACC compute regions and routines.
Overview:
---------
The pass applies implicit `acc declare` actions to global variables referenced in OpenACC regions. While the OpenACC spec focuses on implicit data mapping (handled by ACCImplicitData), implicit declare is advantageous and required for specific cases:
1. Globals referenced in implicit `acc routine` - Since data mapping only applies to compute regions, globals in routines must use `acc declare`.
2. Compiler-generated globals - Type descriptors, runtime names, and error reporting strings introduced during compilation that wouldn't be visible for user-provided `acc declare` directives.
3. Constant globals - Constants like filename strings or initialization values benefit from being marked with `acc declare` rather than being mapped repeatedly (e.g., 1000 kernel launches shouldn't map the same constant 1000 times).
Implementation:
---------------
The pass performs this in two phases:
1. Hoisting: Non-constant globals in compute regions have their address-of operations hoisted out of the region when possible, allowing implicit data mapping instead of declare marking.
2. Declaration: Remaining that must be device available (constants, globals in routines, globals in recipe operations) are marked with the acc.declare attribute.
The pass processes:
- OpenACC compute constructs (parallel, kernels, serial)
- Functions marked with acc routine
- Private, firstprivate, and reduction recipes (when used)
- Initialization regions of existing declared globals
Requirements:
-------------
The pass requires operations to implement:
- acc::AddressOfGlobalOpInterface (for address-of ops)
- acc::GlobalVariableOpInterface (for global definitions)
- acc::IndirectGlobalAccessOpInterface (for indirect access)
---
Patch is 27.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169720.diff
5 Files Affected:
- (modified) mlir/include/mlir/Dialect/OpenACC/OpenACC.h (+5)
- (modified) mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td (+28)
- (added) mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp (+441)
- (modified) mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt (+1)
- (added) mlir/test/Dialect/OpenACC/acc-implicit-declare.mlir (+175)
``````````diff
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index 8571a2d9d0939..252a78648dd74 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -180,6 +180,11 @@ static constexpr StringLiteral getRoutineInfoAttrName() {
return StringLiteral("acc.routine_info");
}
+/// Used to check whether the current operation is an `acc routine`
+inline bool isAccRoutineOp(mlir::Operation *op) {
+ return op->hasAttr(mlir::acc::getRoutineInfoAttrName());
+}
+
static constexpr StringLiteral getFromDefaultClauseAttrName() {
return StringLiteral("acc.from_default");
}
diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
index cad78df2fbb0b..713aaabee65f0 100644
--- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td
@@ -63,6 +63,34 @@ def ACCImplicitData : Pass<"acc-implicit-data", "mlir::ModuleOp"> {
];
}
+def ACCImplicitDeclare : Pass<"acc-implicit-declare", "mlir::ModuleOp"> {
+ let summary = "Applies implicit acc declare to globals referenced in compute and routine acc regions";
+ let description = [{
+ This pass applies implicit `acc declare` actions to global variables
+ referenced in OpenACC compute regions and routine functions.
+
+ The pass performs the following actions:
+
+ 1. Hoists address-of operations for non-constant globals out of OpenACC
+ regions when they can be implicitly mapped rather than declared.
+
+ 2. Collects global symbols referenced in:
+ - OpenACC compute constructs (parallel, kernels, serial)
+ - Functions marked with acc routine
+ - Initialization regions of existing acc declare globals
+ - Private/firstprivate/reduction recipe operations
+
+ 3. Marks collected globals with the acc.declare attribute using the
+ copyin data clause.
+
+ The pass avoids unnecessary declare marking by:
+ - Skipping function symbols (which use acc routine instead)
+ - Hoisting non-constant global references that can use implicit mapping
+ - Only processing symbols that are not already valid in device regions
+ }];
+ let dependentDialects = ["mlir::acc::OpenACCDialect"];
+}
+
def ACCImplicitRoutine : Pass<"acc-implicit-routine", "mlir::ModuleOp"> {
let summary = "Generate implicit acc routine for functions in acc regions";
let description = [{
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
new file mode 100644
index 0000000000000..6585b96762326
--- /dev/null
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitDeclare.cpp
@@ -0,0 +1,441 @@
+//===- ACCImplicitDeclare.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass applies implicit `acc declare` actions to global variables
+// referenced in OpenACC compute regions and routine functions.
+//
+// Overview:
+// ---------
+// Global references in an acc regions (for globals not marked with `acc
+// declare` by the user) can be handled in one of two ways:
+// - Mapped through data clauses
+// - Implicitly marked as `acc declare` (this pass)
+//
+// Thus, the OpenACC specification focuses solely on implicit data mapping rules
+// whose implementation is captured in `ACCImplicitData` pass.
+//
+// However, it is both advantageous and required for certain cases to
+// use implicit `acc declare` instead:
+// - Any functions that are implicitly marked as `acc routine` through
+// `ACCImplicitRoutine` may reference globals. Since data mapping
+// is only possible for compute regions, such globals can only be
+// made available on device through `acc declare`.
+// - Compiler can generate and use globals for cases needed in IR
+// representation such as type descriptors or various names needed for
+// runtime calls and error reporting - such cases often are introduced
+// after a frontend semantic checking is done since it is related to
+// implementation detail. Thus, such compiler generated globals would
+// not have been visible for a user to mark with `acc declare`.
+// - Constant globals such as filename strings or data initialization values
+// are values that do not get mutated but are still needed for appropriate
+// runtime execution. If a kernel is launched 1000 times, it is not a
+// good idea to map such a global 1000 times. Therefore, such globals
+// benefit from being marked with `acc declare`.
+//
+// This pass automatically
+// marks global variables with the `acc.declare` attribute when they are
+// referenced in OpenACC compute constructs or routine functions and meet
+// the criteria noted above, ensuring
+// they are properly handled for device execution.
+//
+// The pass performs two main optimizations:
+//
+// 1. Hoisting: For non-constant globals referenced in compute regions, the
+// pass hoists the address-of operation out of the region when possible,
+// allowing them to be implicitly mapped through normal data clause
+// mechanisms rather than requiring declare marking.
+//
+// 2. Declaration: For globals that must be available on the device (constants,
+// globals in routines, globals in recipe operations), the pass adds the
+// `acc.declare` attribute with the copyin data clause.
+//
+// Requirements:
+// -------------
+// To use this pass in a pipeline, the following requirements must be met:
+//
+// 1. Operation Interface Implementation: Operations that compute addresses
+// of global variables must implement the `acc::AddressOfGlobalOpInterface`
+// and those that represent globals must implement the
+// `acc::GlobalOpInterface`. Additionally, any operations that indirectly
+// access globals must implement the `acc::IndirectGlobalAccessOpInterface`.
+//
+// 2. Analysis Registration (Optional): If custom behavior is needed for
+// determining if a symbol use is valid within GPU regions, the dialect
+// should pre-register the `acc::OpenACCSupport` analysis.
+//
+// Examples:
+// ---------
+//
+// Example 1: Non-constant global in compute region (hoisted)
+//
+// Before:
+// memref.global @g_scalar : memref<f32> = dense<0.0>
+// func.func @test() {
+// acc.serial {
+// %addr = memref.get_global @g_scalar : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// After:
+// memref.global @g_scalar : memref<f32> = dense<0.0>
+// func.func @test() {
+// %addr = memref.get_global @g_scalar : memref<f32>
+// acc.serial {
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// Example 2: Constant global in compute region (declared)
+//
+// Before:
+// memref.global constant @g_const : memref<f32> = dense<1.0>
+// func.func @test() {
+// acc.serial {
+// %addr = memref.get_global @g_const : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// After:
+// memref.global constant @g_const : memref<f32> = dense<1.0>
+// {acc.declare = #acc.declare<dataClause = acc_copyin>}
+// func.func @test() {
+// acc.serial {
+// %addr = memref.get_global @g_const : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// acc.yield
+// }
+// }
+//
+// Example 3: Global in acc routine (declared)
+//
+// Before:
+// memref.global @g_data : memref<f32> = dense<0.0>
+// acc.routine @routine_0 func(@device_func)
+// func.func @device_func() attributes {acc.routine_info = ...} {
+// %addr = memref.get_global @g_data : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// }
+//
+// After:
+// memref.global @g_data : memref<f32> = dense<0.0>
+// {acc.declare = #acc.declare<dataClause = acc_copyin>}
+// acc.routine @routine_0 func(@device_func)
+// func.func @device_func() attributes {acc.routine_info = ...} {
+// %addr = memref.get_global @g_data : memref<f32>
+// %val = memref.load %addr[] : memref<f32>
+// }
+//
+// Example 4: Global in private recipe (declared if recipe is used)
+//
+// Before:
+// memref.global @g_init : memref<f32> = dense<0.0>
+// acc.private.recipe @priv_recipe : memref<f32> init {
+// ^bb0(%arg0: memref<f32>):
+// %alloc = memref.alloc() : memref<f32>
+// %global = memref.get_global @g_init : memref<f32>
+// %val = memref.load %global[] : memref<f32>
+// memref.store %val, %alloc[] : memref<f32>
+// acc.yield %alloc : memref<f32>
+// } destroy { ... }
+// func.func @test() {
+// %var = memref.alloc() : memref<f32>
+// %priv = acc.private varPtr(%var : memref<f32>)
+// recipe(@priv_recipe) -> memref<f32>
+// acc.parallel private(%priv : memref<f32>) { ... }
+// }
+//
+// After:
+// memref.global @g_init : memref<f32> = dense<0.0>
+// {acc.declare = #acc.declare<dataClause = acc_copyin>}
+// acc.private.recipe @priv_recipe : memref<f32> init {
+// ^bb0(%arg0: memref<f32>):
+// %alloc = memref.alloc() : memref<f32>
+// %global = memref.get_global @g_init : memref<f32>
+// %val = memref.load %global[] : memref<f32>
+// memref.store %val, %alloc[] : memref<f32>
+// acc.yield %alloc : memref<f32>
+// } destroy { ... }
+// func.func @test() {
+// %var = memref.alloc() : memref<f32>
+// %priv = acc.private varPtr(%var : memref<f32>)
+// recipe(@priv_recipe) -> memref<f32>
+// acc.parallel private(%priv : memref<f32>) { ... }
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
+
+#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+namespace mlir {
+namespace acc {
+#define GEN_PASS_DEF_ACCIMPLICITDECLARE
+#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
+} // namespace acc
+} // namespace mlir
+
+#define DEBUG_TYPE "acc-implicit-declare"
+
+using namespace mlir;
+
+namespace {
+
+using GlobalOpSetT = llvm::SmallSetVector<Operation *, 16>;
+
+/// Checks whether a use of the requested `globalOp` should be considered
+/// for hoisting out of acc region due to avoid `acc declare`ing something
+/// that instead should be implicitly mapped.
+static bool isGlobalUseCandidateForHoisting(Operation *globalOp,
+ Operation *user,
+ SymbolRefAttr symbol,
+ acc::OpenACCSupport &accSupport) {
+ if (accSupport.isValidSymbolUse(user, symbol)) {
+ // This symbol is valid in GPU region. This means semantics
+ // would change if moved to host - therefore it is not a candidate.
+ return false;
+ }
+
+ bool isConstant = false;
+ bool isFunction = false;
+
+ if (auto globalVarOp =
+ dyn_cast<mlir::acc::GlobalVariableOpInterface>(globalOp)) {
+ isConstant = globalVarOp.isConstant();
+ }
+
+ if (isa<mlir::FunctionOpInterface>(globalOp)) {
+ isFunction = true;
+ }
+
+ // Constants should be kept in device code to ensure they are duplicated.
+ // Function references should be kept in device code to ensure their device
+ // addresses are computed. Everything else should be hoisted since we already
+ // proved they are not valid symbols in GPU region.
+ return !isConstant && !isFunction;
+}
+
+/// Checks whether it is valid to use acc.declare marking on the global.
+bool isValidForAccDeclare(Operation *globalOp) {
+ // For functions - we use acc.routine marking instead.
+ return !isa<mlir::FunctionOpInterface>(globalOp);
+}
+
+/// Checks whether a recipe operation has meaningful use of its symbol that
+/// justifies processing its regions for global references. Returns false if:
+/// 1. The recipe has no symbol uses at all, or
+/// 2. The only symbol use is the recipe's own symbol definition
+template <typename RecipeOpT>
+static bool hasRelevantRecipeUse(RecipeOpT recipeOp) {
+ auto moduleOp = recipeOp->template getParentOfType<mlir::ModuleOp>();
+ std::optional<mlir::SymbolTable::UseRange> symbolUses =
+ recipeOp.getSymbolUses(moduleOp);
+
+ // No recipe symbol uses.
+ if (!symbolUses.has_value() || symbolUses->empty()) {
+ return false;
+ }
+
+ // If more than one use, assume it's used.
+ auto begin = symbolUses->begin();
+ auto end = symbolUses->end();
+ if (begin != end && std::next(begin) != end) {
+ return true;
+ }
+
+ // If single use, check if the use is the recipe itself.
+ const auto &use = *symbolUses->begin();
+ return use.getUser() != recipeOp.getOperation();
+}
+
+// Hoists addr_of operations for non-constant globals out of OpenACC regions.
+// This way - they are implicitly mapped instead of being considered for
+// implicit declare.
+template <typename AccConstructT>
+static void hoistNonConstantDirectUses(AccConstructT accOp,
+ acc::OpenACCSupport &accSupport) {
+ accOp.walk([&](mlir::acc::AddressOfGlobalOpInterface addrOfOp) {
+ SymbolRefAttr symRef = addrOfOp.getSymbol();
+ if (symRef) {
+ Operation *globalOp =
+ SymbolTable::lookupNearestSymbolFrom(addrOfOp, symRef);
+ if (isGlobalUseCandidateForHoisting(globalOp, addrOfOp, symRef,
+ accSupport)) {
+ addrOfOp->moveBefore(accOp);
+ LLVM_DEBUG(
+ llvm::dbgs() << "Hoisted:\n\t" << addrOfOp << "\n\tfrom:\n\t";
+ accOp->print(llvm::dbgs(),
+ OpPrintingFlags{}.skipRegions().enableDebugInfo());
+ llvm::dbgs() << "\n");
+ }
+ }
+ });
+}
+
+// Collects the globals referenced in a device region
+static void collectGlobalsFromDeviceRegion(mlir::Region ®ion,
+ GlobalOpSetT &globals,
+ acc::OpenACCSupport &accSupport) {
+ auto mod = region.getParentOfType<mlir::ModuleOp>();
+ auto symTab = SymbolTable(mod);
+ region.walk([&](Operation *op) {
+ // 1) Only consider relevant operations which use symbols
+ auto addrOfOp = dyn_cast<mlir::acc::AddressOfGlobalOpInterface>(op);
+ if (addrOfOp) {
+ SymbolRefAttr symRef = addrOfOp.getSymbol();
+ // 2) Found an operation which uses the symbol. Next determine if it
+ // is a candidate for `acc declare`. Some of the criteria considered
+ // is whether this symbol is not already a device one (either because
+ // acc declare is already used or this is a CUF global).
+ Operation *globalOp = nullptr;
+ bool isCandidate = !accSupport.isValidSymbolUse(op, symRef, &globalOp);
+ if (isCandidate && globalOp && isValidForAccDeclare(globalOp)) {
+ // 3) Add the candidate to the set of globals to be `acc declare`d.
+ globals.insert(globalOp);
+ }
+ } else if (auto indirectAccessOp =
+ dyn_cast<mlir::acc::IndirectGlobalAccessOpInterface>(op)) {
+ // Process operations that indirectly access globals
+ llvm::SmallVector<SymbolRefAttr> symbols;
+ indirectAccessOp.getReferencedSymbols(symbols, &symTab);
+ for (auto symRef : symbols) {
+ if (Operation *globalOp = SymbolTable::lookupSymbolIn(mod, symRef)) {
+ if (isValidForAccDeclare(globalOp)) {
+ globals.insert(globalOp);
+ }
+ }
+ }
+ }
+ });
+}
+
+// Adds the declare attribute to the operation `op`.
+static void addDeclareAttr(MLIRContext *context, Operation *op,
+ acc::DataClause clause) {
+ if (!op) {
+ return;
+ }
+ op->setAttr(acc::getDeclareAttrName(),
+ acc::DeclareAttr::get(context,
+ acc::DataClauseAttr::get(context, clause)));
+}
+
+// This pass applies implicit declare actions for globals referenced in
+// OpenACC compute and routine regions.
+class ACCImplicitDeclare
+ : public acc::impl::ACCImplicitDeclareBase<ACCImplicitDeclare> {
+public:
+ using ACCImplicitDeclareBase<ACCImplicitDeclare>::ACCImplicitDeclareBase;
+
+ void runOnOperation() override {
+ ModuleOp module = getOperation();
+ auto *context = &getContext();
+ acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
+
+ // 1) Start off by hoisting any AddressOf operations out of acc region
+ // for any cases we do not want to `acc declare`. This is because we can
+ // rely on implicit data mapping in majority of cases without uselessly
+ // polluting the device globals.
+ module.walk([&](Operation *op) {
+ TypeSwitch<Operation *, void>(op)
+ .Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
+ [&](auto accOp) {
+ hoistNonConstantDirectUses(accOp, accSupport);
+ });
+ });
+
+ // 2) Collect global symbols which need to be `acc declare`d. Do it for
+ // compute regions, acc routine, and existing globals with the declare
+ // attribute.
+ GlobalOpSetT globalsToAccDeclare;
+ module.walk([&](Operation *op) {
+ TypeSwitch<Operation *, void>(op)
+ .Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
+ [&](auto accOp) {
+ collectGlobalsFromDeviceRegion(accOp.getRegion(),
+ globalsToAccDeclare, accSupport);
+ })
+ .Case<mlir::FunctionOpInterface>([&](auto func) {
+ if (acc::isAccRoutineOp(func) && !func.isExternal()) {
+ collectGlobalsFromDeviceRegion(func.getFunctionBody(),
+ globalsToAccDeclare, accSupport);
+ }
+ })
+ .Case<mlir::acc::GlobalVariableOpInterface>([&](auto globalVarOp) {
+ if (globalVarOp->getAttr(acc::getDeclareAttrName())) {
+ if (mlir::Region *initRegion = globalVarOp.getInitRegion()) {
+ collectGlobalsFromDeviceRegion(*initRegion, globalsToAccDeclare,
+ accSupport);
+ }
+ }
+ })
+ .Case<acc::PrivateRecipeOp>([&](auto privateRecipe) {
+ if (hasRelevantRecipeUse(privateRecipe)) {
+ collectGlobalsFromDeviceRegion(privateRecipe.getInitRegion(),
+ globalsToAccDeclare, accSupport);
+ collectGlobalsFromDeviceRegion(privateRecipe.getDestroyRegion(),
+ globalsToAccDeclare, accSupport);
+ }
+ })
+ .Case<acc::FirstprivateRecipeOp>([&](auto firstprivateRecipe) {
+ if (hasRelevantRecipeUse(firstprivateRecipe)) {
+ collectGlobalsFromDeviceRegion(firstprivateRecipe.getInitRegion(),
+ globalsToAccDeclare, accSupport);
+ collectGlobalsFromDeviceRegion(
+ firstprivateRecipe.getDestroyRegion(), globalsToAccDeclare,
+ accSupport);
+ collectGlobalsFromDeviceRegion(firstprivateRecipe.getCopyRegion(),
+ globalsToAccDeclare, accSupport);
+ }
+ })
+ .Case<acc::ReductionRecipeOp>([&](auto reductionRecipe) {
+ if (hasRelevantRecipeUse(reductionRecipe)) {
+ collectGlobalsFromDeviceRegion(reductionRecipe.getInitRegion(),
+ globalsToAccDeclare, accSupport);
+ collectGlobalsFromDeviceRegion(
+ reductionRecipe.getCombinerRegion(), globalsToAccDeclare,
+ accSupport);
+ }
+ });
+ });
+
+ // 3) Finally, generate the appropriate declare actions ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169720
More information about the Mlir-commits
mailing list