[flang-commits] [flang] [acc] Change acc declare_action recipe (PR #157764)
Susan Tan ス-ザン タン via flang-commits
flang-commits at lists.llvm.org
Tue Sep 9 15:51:06 PDT 2025
https://github.com/SusanTan updated https://github.com/llvm/llvm-project/pull/157764
>From 14d9e99a1be08d811ea6c720950c73e4c7bc0f39 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Tue, 9 Sep 2025 15:29:09 -0700
Subject: [PATCH 1/2] change recipe
---
flang/lib/Lower/OpenACC.cpp | 56 +++++++++----------
.../acc-declare-unwrap-defaultbounds.f90 | 14 ++---
flang/test/Lower/OpenACC/acc-declare.f90 | 14 ++---
3 files changed, 40 insertions(+), 44 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index bbe749f8c8805..8aa40f84c474f 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -244,17 +244,16 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortranDesc << accFirDescriptorPostfix.str();
- // Updating descriptor must occur before the mapping of the data so that
- // attached data pointer is not overwritten.
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
- builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, descTy,
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ // Use declare_enter for the descriptor so the runtime mirrors allocation
+ // semantics instead of issuing an update. This ensures the descriptor's
+ // device-side metadata is established via a structured begin.
+ EntryOp descEntryOp = createDataEntryOp<EntryOp>(
+ builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, descTy,
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareEnterOp::create(
+ builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
+ mlir::ValueRange(descEntryOp.getAccVar()));
if (unwrapFirBox) {
mlir::Value desc =
@@ -3989,17 +3988,16 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
asFortranDesc << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;
- // Updating descriptor must occur before the mapping of the data so that
- // attached data pointer is not overwritten.
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
- builder, loc, addrOp, asFortranDesc, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, addrOp.getType(),
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ // Use declare_enter for the descriptor so the runtime mirrors allocation
+ // semantics instead of issuing an update. This ensures the descriptor's
+ // device-side metadata is established via a structured begin.
+ EntryOp descEntryOp = createDataEntryOp<EntryOp>(
+ builder, loc, addrOp, asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareEnterOp::create(
+ builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
+ mlir::ValueRange(descEntryOp.getAccVar()));
if (unwrapFirBox) {
auto loadOp = fir::LoadOp::create(builder, loc, addrOp.getResult());
@@ -4092,15 +4090,15 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortran << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ // Use declare_exit for the descriptor to end the structured declare region
+ // instead of issuing an update.
+ mlir::acc::GetDevicePtrOp descEntryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, addrOp, asFortran, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, addrOp.getType(),
+ /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(descEntryOp.getAccVar()));
modBuilder.setInsertionPointAfter(postDeallocOp);
}
diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
index 6869af863644d..f9a8f7bf0469b 100644
--- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
@@ -1,8 +1,8 @@
! This test checks lowering of OpenACC declare directive in function and
! subroutine specification parts.
-
! RUN: bbc -fopenacc -emit-hlfir --openacc-unwrap-fir-box=true --openacc-generate-default-bounds=true %s -o - | FileCheck %s
+
module acc_declare
contains
@@ -258,8 +258,6 @@ subroutine acc_declare_allocate()
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a_desc", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "a", structured = false}
@@ -281,7 +279,7 @@ subroutine acc_declare_allocate()
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {implicit = true, name = "a_desc", structured = false}
+! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.heap<!fir.array<?xi32>>)
! CHECK: return
! CHECK: }
@@ -355,8 +353,8 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
+! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[LOAD:.*]] = fir.load %[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOXADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "data1", structured = false}
@@ -376,8 +374,8 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_dealloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "data1_desc", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 4d95ffa10edaf..3b17dee796619 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -1,8 +1,8 @@
! This test checks lowering of OpenACC declare directive in function and
! subroutine specification parts.
-
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
module acc_declare
contains
@@ -250,8 +250,8 @@ subroutine acc_declare_allocate()
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
+! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
@@ -330,15 +330,15 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
+! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_dealloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "data1", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
>From 153e1f152f4f7a1fffd37cd518cda1cc983ce5f9 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Tue, 9 Sep 2025 15:50:55 -0700
Subject: [PATCH 2/2] add dealloc change
---
flang/lib/Lower/OpenACC.cpp | 14 +++++++-------
.../OpenACC/acc-declare-unwrap-defaultbounds.f90 | 4 ++--
flang/test/Lower/OpenACC/acc-declare.f90 | 4 ++--
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 8aa40f84c474f..1e687d6eb788a 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -337,15 +337,15 @@ static void createDeclareDeallocFuncWithArg(
asFortran << accFirDescriptorPostfix.str();
}
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ // End the structured declare region for the descriptor or its payload
+ // using declare_exit instead of issuing an update.
+ mlir::acc::GetDevicePtrOp postEntryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, var, asFortran, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, var.getType(),
+ /*structured=*/false, /*implicit=*/true, clause, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(postEntryOp.getAccVar()));
modBuilder.setInsertionPointAfter(postDeallocOp);
builder.restoreInsertionPoint(crtInsPt);
}
diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
index f9a8f7bf0469b..8ccaae829dfdf 100644
--- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
@@ -279,8 +279,8 @@ subroutine acc_declare_allocate()
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.heap<!fir.array<?xi32>>)
+! CHECK: %[[GETDEVICEPTR:.*]] = acc.getdeviceptr varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "a_desc", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[GETDEVICEPTR]] : !fir.heap<!fir.array<?xi32>>)
! CHECK: return
! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 3b17dee796619..5a8ab7ed00fe8 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -257,8 +257,8 @@ subroutine acc_declare_allocate()
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "a", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
More information about the flang-commits
mailing list