[flang-commits] [flang] [flang][OpenACC] add pass to bufferize fir.box recipes (PR #163543)
via flang-commits
flang-commits at lists.llvm.org
Wed Oct 15 03:48:35 PDT 2025
https://github.com/jeanPerier created https://github.com/llvm/llvm-project/pull/163543
When working on privatization, it is easier to work with fir.box explicitly in memory, otherwise, there is no way to express that the fir.box will end-up being a descriptor address in FIR which makes it hard to deal with data management.
However, introducing fir.ref<fir.box> early can pessimize early HLFIR optimization because it is harder to reason about the aliasing of `fir.ref<fir.box>` because of the extra memory indirection.
This patch introduces a pass that turns acc `!fir.box<T>` recipes into `!fir.ref<!fir.box<T>>` recipes and updated the related recipe usages to use `!fir.ref<!fir.box<T>>` (creating new alloca+store+load).
It is added to flang and not OpenACC because it is specific to the `fir.box` type, so it makes little sense to make it an OpenACC generic pass and to create a new OpenACC dialect type interface for this use case.
>From 5874af3617f15c18bd31349312e0e89334d76a5e Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Wed, 15 Oct 2025 03:39:55 -0700
Subject: [PATCH] [flang][OpenACC] add pass to bufferize box recipes
---
flang/include/flang/Optimizer/CMakeLists.txt | 1 +
.../flang/Optimizer/OpenACC/CMakeLists.txt | 4 +
.../include/flang/Optimizer/OpenACC/Passes.h | 33 ++
.../include/flang/Optimizer/OpenACC/Passes.td | 36 ++
flang/lib/Optimizer/OpenACC/CMakeLists.txt | 1 +
.../Transforms/ACCRecipeBufferization.cpp | 191 +++++++++++
.../OpenACC/Transforms/CMakeLists.txt | 11 +
.../Fir/OpenACC/recipe-bufferization.mlir | 316 ++++++++++++++++++
flang/tools/fir-opt/CMakeLists.txt | 1 +
flang/tools/fir-opt/fir-opt.cpp | 2 +
10 files changed, 596 insertions(+)
create mode 100644 flang/include/flang/Optimizer/OpenACC/CMakeLists.txt
create mode 100644 flang/include/flang/Optimizer/OpenACC/Passes.h
create mode 100644 flang/include/flang/Optimizer/OpenACC/Passes.td
create mode 100644 flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
create mode 100644 flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
create mode 100644 flang/test/Fir/OpenACC/recipe-bufferization.mlir
diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt
index 3336ac935e101..68af52f1b8dc7 100644
--- a/flang/include/flang/Optimizer/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/CMakeLists.txt
@@ -2,4 +2,5 @@ add_subdirectory(CodeGen)
add_subdirectory(Dialect)
add_subdirectory(HLFIR)
add_subdirectory(Transforms)
+add_subdirectory(OpenACC)
add_subdirectory(OpenMP)
diff --git a/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt
new file mode 100644
index 0000000000000..a032488569b19
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name FIROpenACC)
+
+add_public_tablegen_target(FIROpenACCPassesIncGen)
diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.h b/flang/include/flang/Optimizer/OpenACC/Passes.h
new file mode 100644
index 0000000000000..0627cc8ce4a6d
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.h
@@ -0,0 +1,33 @@
+//===- Passes.h - OpenACC pass entry points -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header declares the OpenACC passes specific to Fortran and FIR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES_H
+#define FORTRAN_OPTIMIZER_OPENACC_PASSES_H
+
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+
+#include <memory>
+
+namespace fir {
+namespace acc {
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "flang/Optimizer/OpenACC/Passes.h.inc"
+
+std::unique_ptr<mlir::Pass> createACCRecipeBufferizationPass();
+
+} // namespace acc
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.td b/flang/include/flang/Optimizer/OpenACC/Passes.td
new file mode 100644
index 0000000000000..3c127b30aa9b8
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.td
@@ -0,0 +1,36 @@
+//===-- Passes.td - flang OpenACC pass definitions -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES
+#define FORTRAN_OPTIMIZER_OPENACC_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+def ACCRecipeBufferization
+ : Pass<"fir-acc-recipe-bufferization", "mlir::ModuleOp"> {
+ let summary = "Rewrite acc.*.recipe box values to ref<box> and update uses";
+ let description = [{
+ Bufferizes OpenACC recipes that operate on fir.box<T> so their type and
+ region block arguments become fir.ref<fir.box<T>> instead. This applies to
+ acc.private.recipe, acc.firstprivate.recipe (including copy region), and
+ acc.reduction.recipe (including combiner region).
+
+ For affected regions, the pass inserts required loads at the beginning of
+ the region to preserve original uses after argument type changes. For yields
+ of box values, the pass allocates a local fir.ref<fir.box<T>> and stores the
+ yielded fir.box<T> into it so the region yields a reference to a box.
+
+ For acc.private, acc.firstprivate, and acc.reduction operations that use a
+ bufferized recipe, the pass allocates a host-side fir.ref<fir.box<T>> before
+ the data op and rewires the data op to use the new memory. Other users of
+ the original data operation result (outside the paired compute op) are
+ updated to load through the reference.
+ }];
+}
+
+#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES
diff --git a/flang/lib/Optimizer/OpenACC/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/CMakeLists.txt
index fc23e64eeb7a4..790b9fdb1589a 100644
--- a/flang/lib/Optimizer/OpenACC/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenACC/CMakeLists.txt
@@ -1 +1,2 @@
add_subdirectory(Support)
+add_subdirectory(Transforms)
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
new file mode 100644
index 0000000000000..4840a999ecd27
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
@@ -0,0 +1,191 @@
+//===- ACCRecipeBufferization.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Bufferize OpenACC recipes that yield fir.box<T> to operate on
+// fir.ref<fir.box<T>> and update uses accordingly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/OpenACC/Passes.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Block.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/IR/Value.h"
+#include "mlir/IR/Visitors.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+namespace fir::acc {
+#define GEN_PASS_DEF_ACCRECIPEBUFFERIZATION
+#include "flang/Optimizer/OpenACC/Passes.h.inc"
+} // namespace fir::acc
+
+namespace {
+
+class BufferizeInterface {
+public:
+ static std::optional<mlir::Type> mustBufferize(mlir::Type recipeType) {
+ if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(recipeType))
+ return fir::ReferenceType::get(boxTy);
+ return std::nullopt;
+ }
+
+ static mlir::Operation *load(mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value value) {
+ return builder.create<fir::LoadOp>(loc, value);
+ }
+
+ static mlir::Value placeInMemory(mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value value) {
+ auto alloca = builder.create<fir::AllocaOp>(loc, value.getType());
+ builder.create<fir::StoreOp>(loc, value, alloca);
+ return alloca;
+ }
+};
+
+static void bufferizeRegionArgsAndYields(mlir::Region ®ion,
+ mlir::Location loc, mlir::Type oldType,
+ mlir::Type newType) {
+ if (region.empty())
+ return;
+
+ mlir::OpBuilder builder(®ion);
+ for (mlir::BlockArgument arg : region.getArguments()) {
+ if (arg.getType() == oldType) {
+ arg.setType(newType);
+ if (!arg.use_empty()) {
+ mlir::Operation *loadOp = BufferizeInterface::load(builder, loc, arg);
+ arg.replaceAllUsesExcept(loadOp->getResult(0), loadOp);
+ }
+ }
+ }
+ if (auto yield =
+ llvm::dyn_cast<mlir::acc::YieldOp>(region.back().getTerminator())) {
+ llvm::SmallVector<mlir::Value> newOperands;
+ newOperands.reserve(yield.getNumOperands());
+ bool changed = false;
+ for (mlir::Value oldYieldArg : yield.getOperands()) {
+ if (oldYieldArg.getType() == oldType) {
+ builder.setInsertionPoint(yield);
+ mlir::Value alloca =
+ BufferizeInterface::placeInMemory(builder, loc, oldYieldArg);
+ newOperands.push_back(alloca);
+ changed = true;
+ } else {
+ newOperands.push_back(oldYieldArg);
+ }
+ }
+ if (changed)
+ yield->setOperands(newOperands);
+ }
+}
+
+static void updateRecipeUse(mlir::ArrayAttr recipes, mlir::ValueRange operands,
+ llvm::StringRef recipeSymName,
+ mlir::Operation *computeOp) {
+ if (!recipes)
+ return;
+ for (auto [recipeSym, oldRes] : llvm::zip(recipes, operands)) {
+ if (llvm::cast<mlir::SymbolRefAttr>(recipeSym).getLeafReference() !=
+ recipeSymName)
+ continue;
+
+ mlir::Operation *dataOp = oldRes.getDefiningOp();
+ assert(dataOp && "dataOp must be paired with computeOp");
+ mlir::Location loc = dataOp->getLoc();
+ mlir::OpBuilder builder(dataOp);
+ llvm::TypeSwitch<mlir::Operation *, void>(dataOp)
+ .Case<mlir::acc::PrivateOp, mlir::acc::FirstprivateOp,
+ mlir::acc::ReductionOp>([&](auto privateOp) {
+ builder.setInsertionPointAfterValue(privateOp.getVar());
+ mlir::Value alloca = BufferizeInterface::placeInMemory(
+ builder, loc, privateOp.getVar());
+ privateOp.getVarMutable().assign(alloca);
+ privateOp.getAccVar().setType(alloca.getType());
+ });
+
+ llvm::SmallVector<mlir::Operation *> users(oldRes.getUsers().begin(),
+ oldRes.getUsers().end());
+ for (mlir::Operation *useOp : users) {
+ if (useOp == computeOp)
+ continue;
+ builder.setInsertionPoint(useOp);
+ mlir::Operation *load = BufferizeInterface::load(builder, loc, oldRes);
+ useOp->replaceUsesOfWith(oldRes, load->getResult(0));
+ }
+ }
+}
+
+class ACCRecipeBufferization
+ : public fir::acc::impl::ACCRecipeBufferizationBase<
+ ACCRecipeBufferization> {
+public:
+ void runOnOperation() override {
+ mlir::ModuleOp module = getOperation();
+
+ llvm::SmallVector<llvm::StringRef> recipeNames;
+ module.walk([&](mlir::Operation *recipe) {
+ llvm::TypeSwitch<mlir::Operation *, void>(recipe)
+ .Case<mlir::acc::PrivateRecipeOp, mlir::acc::FirstprivateRecipeOp,
+ mlir::acc::ReductionRecipeOp>([&](auto recipe) {
+ mlir::Type oldType = recipe.getType();
+ auto bufferizedType =
+ BufferizeInterface::mustBufferize(recipe.getType());
+ if (!bufferizedType)
+ return;
+ recipe.setTypeAttr(mlir::TypeAttr::get(*bufferizedType));
+ mlir::Location loc = recipe.getLoc();
+ using RecipeOp = decltype(recipe);
+ bufferizeRegionArgsAndYields(recipe.getInitRegion(), loc, oldType,
+ *bufferizedType);
+ if constexpr (std::is_same_v<RecipeOp,
+ mlir::acc::FirstprivateRecipeOp>)
+ bufferizeRegionArgsAndYields(recipe.getCopyRegion(), loc, oldType,
+ *bufferizedType);
+ if constexpr (std::is_same_v<RecipeOp,
+ mlir::acc::ReductionRecipeOp>)
+ bufferizeRegionArgsAndYields(recipe.getCombinerRegion(), loc,
+ oldType, *bufferizedType);
+ bufferizeRegionArgsAndYields(recipe.getDestroyRegion(), loc,
+ oldType, *bufferizedType);
+ recipeNames.push_back(recipe.getSymName());
+ });
+ });
+ if (recipeNames.empty())
+ return;
+
+ module.walk([&](mlir::Operation *op) {
+ llvm::TypeSwitch<mlir::Operation *, void>(op)
+ .Case<mlir::acc::LoopOp, mlir::acc::ParallelOp, mlir::acc::SerialOp>(
+ [&](auto computeOp) {
+ for (llvm::StringRef recipeName : recipeNames) {
+ if (computeOp.getPrivatizationRecipes())
+ updateRecipeUse(computeOp.getPrivatizationRecipesAttr(),
+ computeOp.getPrivateOperands(), recipeName,
+ op);
+ if (computeOp.getFirstprivatizationRecipes())
+ updateRecipeUse(
+ computeOp.getFirstprivatizationRecipesAttr(),
+ computeOp.getFirstprivateOperands(), recipeName, op);
+ if (computeOp.getReductionRecipes())
+ updateRecipeUse(computeOp.getReductionRecipesAttr(),
+ computeOp.getReductionOperands(),
+ recipeName, op);
+ }
+ });
+ });
+ }
+};
+
+} // namespace
+
+std::unique_ptr<mlir::Pass> fir::acc::createACCRecipeBufferizationPass() {
+ return std::make_unique<ACCRecipeBufferization>();
+}
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
new file mode 100644
index 0000000000000..3a2d945f8ac5a
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_flang_library(FIROpenACCTransforms
+ ACCRecipeBufferization.cpp
+
+ DEPENDS
+ FIROpenACCPassesIncGen
+
+ LINK_LIBS
+ MLIRIR
+ FIRDialect
+ MLIROpenACCDialect
+)
diff --git a/flang/test/Fir/OpenACC/recipe-bufferization.mlir b/flang/test/Fir/OpenACC/recipe-bufferization.mlir
new file mode 100644
index 0000000000000..c4f96f63d5076
--- /dev/null
+++ b/flang/test/Fir/OpenACC/recipe-bufferization.mlir
@@ -0,0 +1,316 @@
+// RUN: fir-opt %s --fir-acc-recipe-bufferization -split-input-file | FileCheck %s
+
+// -----
+
+acc.private.recipe @priv_ref_box : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %1 = fir.allocmem i32
+ %2 = fir.embox %1 : (!fir.heap<i32>) -> !fir.box<i32>
+ acc.yield %2 : !fir.box<i32>
+} destroy {
+^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>):
+ %0 = fir.box_addr %arg1 : (!fir.box<i32>) -> !fir.ref<i32>
+ %1 = fir.convert %0 : (!fir.ref<i32>) -> !fir.heap<i32>
+ fir.freemem %1 : !fir.heap<i32>
+ acc.yield
+}
+
+// CHECK-LABEL: acc.private.recipe @priv_ref_box : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX:.*]] = fir.embox
+// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%[[DARG0:.*]]: !fir.ref<!fir.box<i32>>, %[[DARG1:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LD1:.*]] = fir.load %[[DARG1]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[ADDR:.*]] = fir.box_addr %[[LD1]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[CVT:.*]] = fir.convert %[[ADDR]] : (!fir.ref<i32>) -> !fir.heap<i32>
+
+// -----
+
+// Test private recipe without destroy region.
+
+acc.private.recipe @priv_ref_box_no_destroy : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %1 = fir.alloca i32
+ %2 = fir.embox %1 : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %2 : !fir.box<i32>
+}
+
+// CHECK-LABEL: acc.private.recipe @priv_ref_box_no_destroy : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX:.*]] = fir.embox
+// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: }
+
+// -----
+
+// Firstprivate recipe with destroy region.
+acc.firstprivate.recipe @fp_ref_box : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.allocmem i32
+ %1 = fir.embox %0 : (!fir.heap<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} copy {
+^bb0(%src: !fir.box<i32>, %dst: !fir.box<i32>):
+ %s_addr = fir.box_addr %src : (!fir.box<i32>) -> !fir.ref<i32>
+ %val = fir.load %s_addr : !fir.ref<i32>
+ %d_addr = fir.box_addr %dst : (!fir.box<i32>) -> !fir.ref<i32>
+ fir.store %val to %d_addr : !fir.ref<i32>
+ acc.yield
+} destroy {
+^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>):
+ acc.yield
+}
+
+// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[IARG:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX_FP:.*]] = fir.embox
+// CHECK: %[[ALLOCA_FP:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX_FP]] to %[[ALLOCA_FP]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA_FP]] : !fir.ref<!fir.box<i32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.box<i32>>, %[[DST:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LSRC:.*]] = fir.load %[[SRC]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LDST:.*]] = fir.load %[[DST]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[SADDR:.*]] = fir.box_addr %[[LSRC]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL:.*]] = fir.load %[[SADDR]] : !fir.ref<i32>
+// CHECK: %[[DADDR:.*]] = fir.box_addr %[[LDST]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: fir.store %[[VAL]] to %[[DADDR]] : !fir.ref<i32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%[[FDARG0:.*]]: !fir.ref<!fir.box<i32>>, %[[FDARG1:.*]]: !fir.ref<!fir.box<i32>>)
+
+// -----
+
+// Firstprivate recipe without destroy region.
+acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.alloca i32
+ %1 = fir.embox %0 : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} copy {
+^bb0(%src: !fir.box<i32>, %dst: !fir.box<i32>):
+ %s_addr = fir.box_addr %src : (!fir.box<i32>) -> !fir.ref<i32>
+ %val = fir.load %s_addr : !fir.ref<i32>
+ %d_addr = fir.box_addr %dst : (!fir.box<i32>) -> !fir.ref<i32>
+ fir.store %val to %d_addr : !fir.ref<i32>
+ acc.yield
+}
+
+// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[IARG2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX_FP2:.*]] = fir.embox
+// CHECK: %[[ALLOCA_FP2:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX_FP2]] to %[[ALLOCA_FP2]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA_FP2]] : !fir.ref<!fir.box<i32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC2:.*]]: !fir.ref<!fir.box<i32>>, %[[DST2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LSRC2:.*]] = fir.load %[[SRC2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LDST2:.*]] = fir.load %[[DST2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[SADDR2:.*]] = fir.box_addr %[[LSRC2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL2:.*]] = fir.load %[[SADDR2]] : !fir.ref<i32>
+// CHECK: %[[DADDR2:.*]] = fir.box_addr %[[LDST2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: fir.store %[[VAL2]] to %[[DADDR2]] : !fir.ref<i32>
+
+// -----
+
+// Reduction recipe with destroy region.
+acc.reduction.recipe @red_ref_box : !fir.box<i32> reduction_operator <add> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.allocmem i32
+ %1 = fir.embox %0 : (!fir.heap<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} combiner {
+^bb0(%lhs: !fir.box<i32>, %rhs: !fir.box<i32>):
+ %l_addr = fir.box_addr %lhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %l_val = fir.load %l_addr : !fir.ref<i32>
+ %r_addr = fir.box_addr %rhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %r_val = fir.load %r_addr : !fir.ref<i32>
+ %sum = arith.addi %l_val, %r_val : i32
+ %tmp = fir.alloca i32
+ fir.store %sum to %tmp : !fir.ref<i32>
+ %new = fir.embox %tmp : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %new : !fir.box<i32>
+} destroy {
+^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>):
+ acc.yield
+}
+
+// CHECK-LABEL: acc.reduction.recipe @red_ref_box : !fir.ref<!fir.box<i32>> reduction_operator <add> init
+// CHECK: ^bb0(%[[IARGR:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOXR:.*]] = fir.embox
+// CHECK: %[[ALLOCAR:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOXR]] to %[[ALLOCAR]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCAR]] : !fir.ref<!fir.box<i32>>
+// CHECK: } combiner {
+// CHECK: ^bb0(%[[LHS:.*]]: !fir.ref<!fir.box<i32>>, %[[RHS:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LLHS:.*]] = fir.load %[[LHS]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LRHS:.*]] = fir.load %[[RHS]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LADDR:.*]] = fir.box_addr %[[LLHS]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[LVAL:.*]] = fir.load %[[LADDR]] : !fir.ref<i32>
+// CHECK: %[[RADDR:.*]] = fir.box_addr %[[LRHS]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[RVAL:.*]] = fir.load %[[RADDR]] : !fir.ref<i32>
+// CHECK: %[[SUM:.*]] = arith.addi %[[LVAL]], %[[RVAL]] : i32
+// CHECK: %[[I32ALLOCA:.*]] = fir.alloca i32
+// CHECK: fir.store %[[SUM]] to %[[I32ALLOCA]] : !fir.ref<i32>
+// CHECK: %[[NEWBOX:.*]] = fir.embox %[[I32ALLOCA]] : (!fir.ref<i32>) -> !fir.box<i32>
+// CHECK: %[[BOXALLOCA:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[NEWBOX]] to %[[BOXALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[BOXALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%[[RD0:.*]]: !fir.ref<!fir.box<i32>>, %[[RD1:.*]]: !fir.ref<!fir.box<i32>>)
+
+// -----
+
+// Reduction recipe without destroy region.
+acc.reduction.recipe @red_ref_box_no_destroy : !fir.box<i32> reduction_operator <add> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.alloca i32
+ %1 = fir.embox %0 : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} combiner {
+^bb0(%lhs: !fir.box<i32>, %rhs: !fir.box<i32>):
+ %l_addr = fir.box_addr %lhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %l_val = fir.load %l_addr : !fir.ref<i32>
+ %r_addr = fir.box_addr %rhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %r_val = fir.load %r_addr : !fir.ref<i32>
+ %sum = arith.addi %l_val, %r_val : i32
+ %tmp = fir.alloca i32
+ fir.store %sum to %tmp : !fir.ref<i32>
+ %new = fir.embox %tmp : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %new : !fir.box<i32>
+}
+
+// CHECK-LABEL: acc.reduction.recipe @red_ref_box_no_destroy : !fir.ref<!fir.box<i32>> reduction_operator <add> init
+// CHECK: ^bb0(%[[IARGR2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOXR2:.*]] = fir.embox
+// CHECK: %[[ALLOCAR2:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOXR2]] to %[[ALLOCAR2]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCAR2]] : !fir.ref<!fir.box<i32>>
+// CHECK: } combiner {
+// CHECK: ^bb0(%[[LHS2:.*]]: !fir.ref<!fir.box<i32>>, %[[RHS2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LLHS2:.*]] = fir.load %[[LHS2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LRHS2:.*]] = fir.load %[[RHS2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LADDR2:.*]] = fir.box_addr %[[LLHS2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[LVAL2:.*]] = fir.load %[[LADDR2]] : !fir.ref<i32>
+// CHECK: %[[RADDR2:.*]] = fir.box_addr %[[LRHS2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[RVAL2:.*]] = fir.load %[[RADDR2]] : !fir.ref<i32>
+// CHECK: %[[SUM2:.*]] = arith.addi %[[LVAL2]], %[[RVAL2]] : i32
+// CHECK: %[[I32ALLOCA2:.*]] = fir.alloca i32
+// CHECK: fir.store %[[SUM2]] to %[[I32ALLOCA2]] : !fir.ref<i32>
+// CHECK: %[[NEWBOX2:.*]] = fir.embox %[[I32ALLOCA2]] : (!fir.ref<i32>) -> !fir.box<i32>
+// CHECK: %[[BOXALLOCA2:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[NEWBOX2]] to %[[BOXALLOCA2]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[BOXALLOCA2]] : !fir.ref<!fir.box<i32>>
+
+// -----
+
+// Comprehensive tests that also test recipe usages updates.
+
+acc.private.recipe @privatization_ref_i32 : !fir.ref<i32> init {
+^bb0(%arg0: !fir.ref<i32>):
+ %0 = fir.alloca i32
+ %1 = fir.declare %0 {uniq_name = "acc.private.init"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ acc.yield %1 : !fir.ref<i32>
+}
+acc.private.recipe @privatization_box_Uxf32 : !fir.box<!fir.array<?xf32>> init {
+^bb0(%arg0: !fir.box<!fir.array<?xf32>>):
+ %c0 = arith.constant 0 : index
+ %0:3 = fir.box_dims %arg0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+ %1 = fir.shape %0#1 : (index) -> !fir.shape<1>
+ %2 = fir.allocmem !fir.array<?xf32>, %0#1 {bindc_name = ".tmp", uniq_name = ""}
+ %3 = fir.declare %2(%1) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xf32>>
+ %4 = fir.embox %3(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ acc.yield %4 : !fir.box<!fir.array<?xf32>>
+} destroy {
+^bb0(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?xf32>>):
+ %0 = fir.box_addr %arg1 : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+ %1 = fir.convert %0 : (!fir.ref<!fir.array<?xf32>>) -> !fir.heap<!fir.array<?xf32>>
+ fir.freemem %1 : !fir.heap<!fir.array<?xf32>>
+ acc.terminator
+}
+func.func @_QPfoo(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
+ %c200_i32 = arith.constant 200 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"}
+ %2 = fir.declare %1 {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+ acc.parallel combined(loop) {
+ %4 = acc.private var(%3 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "x"}
+ %5 = acc.private varPtr(%2 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+ acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %4 : !fir.box<!fir.array<?xf32>>, @privatization_ref_i32 -> %5 : !fir.ref<i32>) control(%arg1 : i32) = (%c1_i32 : i32) to (%c200_i32 : i32) step (%c1_i32 : i32) {
+ %6 = fir.dummy_scope : !fir.dscope
+ %7 = fir.declare %4 dummy_scope %6 {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+ %8 = fir.declare %5 {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %9 = fir.convert %arg1 : (i32) -> f32
+ %10 = fir.convert %arg1 : (i32) -> i64
+ %11 = fir.array_coor %7 %10 : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+ fir.store %9 to %11 : !fir.ref<f32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: acc.private.recipe @privatization_ref_i32 : !fir.ref<i32> init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+// CHECK: %[[VAL_1:.*]] = fir.alloca i32
+// CHECK: %[[VAL_2:.*]] = fir.declare %[[VAL_1]] {uniq_name = "acc.private.init"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: acc.yield %[[VAL_2]] : !fir.ref<i32>
+// CHECK: }
+
+// CHECK-LABEL: acc.private.recipe @privatization_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>):
+// CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_3]]#1 {bindc_name = ".tmp", uniq_name = ""}
+// CHECK: %[[VAL_6:.*]] = fir.declare %[[VAL_5]](%[[VAL_4]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]](%[[VAL_4]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
+// CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: acc.yield %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+
+// CHECK-LABEL: } destroy {
+// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>):
+// CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<!fir.array<?xf32>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK: fir.freemem %[[VAL_4]] : !fir.heap<!fir.array<?xf32>>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @_QPfoo(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant 200 : i32
+// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"}
+// CHECK: %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL_5:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
+// CHECK: fir.store %[[VAL_5]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: acc.parallel combined(loop) {
+// CHECK: %[[VAL_7:.*]] = acc.private varPtr(%[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.ref<!fir.box<!fir.array<?xf32>>> {name = "x"}
+// CHECK: %[[VAL_8:.*]] = acc.private varPtr(%[[VAL_4]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+// CHECK: acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, @privatization_ref_i32 -> %[[VAL_8]] : !fir.ref<i32>) control(%[[VAL_9:.*]] : i32) = (%[[VAL_1]] : i32) to (%[[VAL_0]] : i32) step (%[[VAL_1]] : i32) {
+// CHECK: %[[VAL_10:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: %[[VAL_12:.*]] = fir.declare %[[VAL_11]] dummy_scope %[[VAL_10]] {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[VAL_13:.*]] = fir.declare %[[VAL_8]] {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
+// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64
+// CHECK: %[[VAL_16:.*]] = fir.array_coor %[[VAL_12]] %[[VAL_15]] : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+// CHECK: fir.store %[[VAL_14]] to %[[VAL_16]] : !fir.ref<f32>
+// CHECK: acc.yield
+// CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+// CHECK: acc.yield
+// CHECK: }
+// CHECK: return
+// CHECK: }
diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt
index 4ee9752727b87..c5bd4390a4b78 100644
--- a/flang/tools/fir-opt/CMakeLists.txt
+++ b/flang/tools/fir-opt/CMakeLists.txt
@@ -22,6 +22,7 @@ target_link_libraries(fir-opt PRIVATE
HLFIRDialect
HLFIRTransforms
FIROpenACCSupport
+ FIROpenACCTransforms
FIROpenMPSupport
FlangOpenMPTransforms
FIRAnalysis
diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp
index d66fc3f08bdf8..b0b277b88dbe2 100644
--- a/flang/tools/fir-opt/fir-opt.cpp
+++ b/flang/tools/fir-opt/fir-opt.cpp
@@ -14,6 +14,7 @@
#include "mlir/Tools/mlir-opt/MlirOptMain.h"
#include "flang/Optimizer/CodeGen/CodeGen.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenACC/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Support/InitFIR.h"
#include "flang/Optimizer/Transforms/Passes.h"
@@ -37,6 +38,7 @@ int main(int argc, char **argv) {
fir::registerOptTransformPasses();
hlfir::registerHLFIRPasses();
flangomp::registerFlangOpenMPPasses();
+ fir::acc::registerFIROpenACCPasses();
#ifdef FLANG_INCLUDE_TESTS
fir::test::registerTestFIRAliasAnalysisPass();
fir::test::registerTestFIROpenACCInterfacesPass();
More information about the flang-commits
mailing list