[flang-commits] [flang] [acc] Change acc declare_action recipe (PR #157764)
Susan Tan ス-ザン タン via flang-commits
flang-commits at lists.llvm.org
Tue Sep 9 16:50:27 PDT 2025
https://github.com/SusanTan updated https://github.com/llvm/llvm-project/pull/157764
>From 14d9e99a1be08d811ea6c720950c73e4c7bc0f39 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Tue, 9 Sep 2025 15:29:09 -0700
Subject: [PATCH 1/3] change recipe
---
flang/lib/Lower/OpenACC.cpp | 56 +++++++++----------
.../acc-declare-unwrap-defaultbounds.f90 | 14 ++---
flang/test/Lower/OpenACC/acc-declare.f90 | 14 ++---
3 files changed, 40 insertions(+), 44 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index bbe749f8c8805..8aa40f84c474f 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -244,17 +244,16 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortranDesc << accFirDescriptorPostfix.str();
- // Updating descriptor must occur before the mapping of the data so that
- // attached data pointer is not overwritten.
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
- builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, descTy,
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ // Use declare_enter for the descriptor so the runtime mirrors allocation
+ // semantics instead of issuing an update. This ensures the descriptor's
+ // device-side metadata is established via a structured begin.
+ EntryOp descEntryOp = createDataEntryOp<EntryOp>(
+ builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, descTy,
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareEnterOp::create(
+ builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
+ mlir::ValueRange(descEntryOp.getAccVar()));
if (unwrapFirBox) {
mlir::Value desc =
@@ -3989,17 +3988,16 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
asFortranDesc << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;
- // Updating descriptor must occur before the mapping of the data so that
- // attached data pointer is not overwritten.
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
- builder, loc, addrOp, asFortranDesc, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, addrOp.getType(),
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ // Use declare_enter for the descriptor so the runtime mirrors allocation
+ // semantics instead of issuing an update. This ensures the descriptor's
+ // device-side metadata is established via a structured begin.
+ EntryOp descEntryOp = createDataEntryOp<EntryOp>(
+ builder, loc, addrOp, asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareEnterOp::create(
+ builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
+ mlir::ValueRange(descEntryOp.getAccVar()));
if (unwrapFirBox) {
auto loadOp = fir::LoadOp::create(builder, loc, addrOp.getResult());
@@ -4092,15 +4090,15 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortran << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ // Use declare_exit for the descriptor to end the structured declare region
+ // instead of issuing an update.
+ mlir::acc::GetDevicePtrOp descEntryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, addrOp, asFortran, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, addrOp.getType(),
+ /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(descEntryOp.getAccVar()));
modBuilder.setInsertionPointAfter(postDeallocOp);
}
diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
index 6869af863644d..f9a8f7bf0469b 100644
--- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
@@ -1,8 +1,8 @@
! This test checks lowering of OpenACC declare directive in function and
! subroutine specification parts.
-
! RUN: bbc -fopenacc -emit-hlfir --openacc-unwrap-fir-box=true --openacc-generate-default-bounds=true %s -o - | FileCheck %s
+
module acc_declare
contains
@@ -258,8 +258,6 @@ subroutine acc_declare_allocate()
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a_desc", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "a", structured = false}
@@ -281,7 +279,7 @@ subroutine acc_declare_allocate()
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {implicit = true, name = "a_desc", structured = false}
+! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.heap<!fir.array<?xi32>>)
! CHECK: return
! CHECK: }
@@ -355,8 +353,8 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
+! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[LOAD:.*]] = fir.load %[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOXADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "data1", structured = false}
@@ -376,8 +374,8 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_dealloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "data1_desc", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 4d95ffa10edaf..3b17dee796619 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -1,8 +1,8 @@
! This test checks lowering of OpenACC declare directive in function and
! subroutine specification parts.
-
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
module acc_declare
contains
@@ -250,8 +250,8 @@ subroutine acc_declare_allocate()
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
+! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
@@ -330,15 +330,15 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
+! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_dealloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "data1", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
>From 153e1f152f4f7a1fffd37cd518cda1cc983ce5f9 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Tue, 9 Sep 2025 15:50:55 -0700
Subject: [PATCH 2/3] add dealloc change
---
flang/lib/Lower/OpenACC.cpp | 14 +++++++-------
.../OpenACC/acc-declare-unwrap-defaultbounds.f90 | 4 ++--
flang/test/Lower/OpenACC/acc-declare.f90 | 4 ++--
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 8aa40f84c474f..1e687d6eb788a 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -337,15 +337,15 @@ static void createDeclareDeallocFuncWithArg(
asFortran << accFirDescriptorPostfix.str();
}
- mlir::acc::UpdateDeviceOp updateDeviceOp =
- createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ // End the structured declare region for the descriptor or its payload
+ // using declare_exit instead of issuing an update.
+ mlir::acc::GetDevicePtrOp postEntryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, var, asFortran, bounds,
- /*structured=*/false, /*implicit=*/true,
- mlir::acc::DataClause::acc_update_device, var.getType(),
+ /*structured=*/false, /*implicit=*/true, clause, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
- llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
- createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(postEntryOp.getAccVar()));
modBuilder.setInsertionPointAfter(postDeallocOp);
builder.restoreInsertionPoint(crtInsPt);
}
diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
index f9a8f7bf0469b..8ccaae829dfdf 100644
--- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
@@ -279,8 +279,8 @@ subroutine acc_declare_allocate()
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.heap<!fir.array<?xi32>>)
+! CHECK: %[[GETDEVICEPTR:.*]] = acc.getdeviceptr varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "a_desc", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[GETDEVICEPTR]] : !fir.heap<!fir.array<?xi32>>)
! CHECK: return
! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 3b17dee796619..5a8ab7ed00fe8 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -257,8 +257,8 @@ subroutine acc_declare_allocate()
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
-! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
-! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "a", structured = false}
+! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
>From 46c9a8ce0f3157e9cb3bccb60d2196bc354c8a07 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Tue, 9 Sep 2025 16:50:16 -0700
Subject: [PATCH 3/3] preserve unwrap
---
flang/lib/Lower/OpenACC.cpp | 202 ++++++++++++------
.../acc-declare-unwrap-defaultbounds.f90 | 18 +-
2 files changed, 150 insertions(+), 70 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 1e687d6eb788a..1fbd1b31a8db7 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -244,16 +244,29 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortranDesc << accFirDescriptorPostfix.str();
- // Use declare_enter for the descriptor so the runtime mirrors allocation
- // semantics instead of issuing an update. This ensures the descriptor's
- // device-side metadata is established via a structured begin.
- EntryOp descEntryOp = createDataEntryOp<EntryOp>(
- builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
- /*structured=*/false, /*implicit=*/true, clause, descTy,
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- mlir::acc::DeclareEnterOp::create(
- builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
- mlir::ValueRange(descEntryOp.getAccVar()));
+ // For descriptor, preserve old behavior when unwrapping FIR box: update.
+ if (unwrapFirBox) {
+ mlir::acc::UpdateDeviceOp updateDeviceOp =
+ createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true,
+ mlir::acc::DataClause::acc_update_device, descTy,
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
+ llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
+ createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands,
+ operandSegments);
+ } else {
+ // New behavior: start a structured region with declare_enter.
+ EntryOp descEntryOp = createDataEntryOp<EntryOp>(
+ builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, descTy,
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareEnterOp::create(
+ builder, loc,
+ mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
+ mlir::ValueRange(descEntryOp.getAccVar()));
+ }
if (unwrapFirBox) {
mlir::Value desc =
@@ -298,30 +311,58 @@ static void createDeclareDeallocFuncWithArg(
}
llvm::SmallVector<mlir::Value> bounds;
- mlir::acc::GetDevicePtrOp entryOp =
- createDataEntryOp<mlir::acc::GetDevicePtrOp>(
- builder, loc, var, asFortran, bounds,
- /*structured=*/false, /*implicit=*/false, clause, var.getType(),
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
- mlir::ValueRange(entryOp.getAccVar()));
-
- if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
- std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
- ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
- entryOp.getVar(), entryOp.getVarType(), entryOp.getBounds(),
- entryOp.getAsyncOperands(),
- entryOp.getAsyncOperandsDeviceTypeAttr(),
- entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
- /*structured=*/false, /*implicit=*/false,
- builder.getStringAttr(*entryOp.getName()));
- else
- ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
- entryOp.getBounds(), entryOp.getAsyncOperands(),
- entryOp.getAsyncOperandsDeviceTypeAttr(),
- entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
- /*structured=*/false, /*implicit=*/false,
- builder.getStringAttr(*entryOp.getName()));
+ if (unwrapFirBox) {
+ // Unwrap: delete device payload using getdeviceptr + declare_exit + ExitOp
+ mlir::acc::GetDevicePtrOp entryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
+ builder, loc, var, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/false, clause, var.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(entryOp.getAccVar()));
+
+ if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
+ std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
+ ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
+ entryOp.getVar(), entryOp.getVarType(),
+ entryOp.getBounds(), entryOp.getAsyncOperands(),
+ entryOp.getAsyncOperandsDeviceTypeAttr(),
+ entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
+ /*structured=*/false, /*implicit=*/false,
+ builder.getStringAttr(*entryOp.getName()));
+ else
+ ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
+ entryOp.getBounds(), entryOp.getAsyncOperands(),
+ entryOp.getAsyncOperandsDeviceTypeAttr(),
+ entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
+ /*structured=*/false, /*implicit=*/false,
+ builder.getStringAttr(*entryOp.getName()));
+ } else {
+ mlir::acc::GetDevicePtrOp entryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
+ builder, loc, var, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/false, clause, var.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(entryOp.getAccVar()));
+
+ if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
+ std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
+ ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
+ entryOp.getVar(), entryOp.getVarType(),
+ entryOp.getBounds(), entryOp.getAsyncOperands(),
+ entryOp.getAsyncOperandsDeviceTypeAttr(),
+ entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
+ /*structured=*/false, /*implicit=*/false,
+ builder.getStringAttr(*entryOp.getName()));
+ else
+ ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
+ entryOp.getBounds(), entryOp.getAsyncOperands(),
+ entryOp.getAsyncOperandsDeviceTypeAttr(),
+ entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
+ /*structured=*/false, /*implicit=*/false,
+ builder.getStringAttr(*entryOp.getName()));
+ }
// Generate the post dealloc function.
modBuilder.setInsertionPointAfter(preDeallocOp);
@@ -337,15 +378,28 @@ static void createDeclareDeallocFuncWithArg(
asFortran << accFirDescriptorPostfix.str();
}
- // End the structured declare region for the descriptor or its payload
- // using declare_exit instead of issuing an update.
- mlir::acc::GetDevicePtrOp postEntryOp =
- createDataEntryOp<mlir::acc::GetDevicePtrOp>(
- builder, loc, var, asFortran, bounds,
- /*structured=*/false, /*implicit=*/true, clause, var.getType(),
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
- mlir::ValueRange(postEntryOp.getAccVar()));
+ if (unwrapFirBox) {
+ // Old behavior: update descriptor after deallocation.
+ mlir::acc::UpdateDeviceOp updateDeviceOp =
+ createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ builder, loc, var, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/true,
+ mlir::acc::DataClause::acc_update_device, var.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
+ llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
+ createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands,
+ operandSegments);
+ } else {
+ // New behavior: end structured region with declare_exit.
+ mlir::acc::GetDevicePtrOp postEntryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
+ builder, loc, var, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, var.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(postEntryOp.getAccVar()));
+ }
modBuilder.setInsertionPointAfter(postDeallocOp);
builder.restoreInsertionPoint(crtInsPt);
}
@@ -3988,16 +4042,28 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
asFortranDesc << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;
- // Use declare_enter for the descriptor so the runtime mirrors allocation
- // semantics instead of issuing an update. This ensures the descriptor's
- // device-side metadata is established via a structured begin.
- EntryOp descEntryOp = createDataEntryOp<EntryOp>(
- builder, loc, addrOp, asFortranDesc, bounds,
- /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- mlir::acc::DeclareEnterOp::create(
- builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
- mlir::ValueRange(descEntryOp.getAccVar()));
+ // For unwrapFirBox=false this remains declare_enter; for unwrapFirBox=true,
+ // the descriptor post-alloc remains update behavior.
+ if (unwrapFirBox) {
+ mlir::acc::UpdateDeviceOp updDesc =
+ createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ builder, loc, addrOp, asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true,
+ mlir::acc::DataClause::acc_update_device, addrOp.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ llvm::SmallVector<int32_t> seg{0, 0, 0, 1};
+ llvm::SmallVector<mlir::Value> ops{updDesc.getResult()};
+ createSimpleOp<mlir::acc::UpdateOp>(builder, loc, ops, seg);
+ } else {
+ EntryOp descEntryOp = createDataEntryOp<EntryOp>(
+ builder, loc, addrOp, asFortranDesc, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareEnterOp::create(
+ builder, loc,
+ mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
+ mlir::ValueRange(descEntryOp.getAccVar()));
+ }
if (unwrapFirBox) {
auto loadOp = fir::LoadOp::create(builder, loc, addrOp.getResult());
@@ -4090,15 +4156,27 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortran << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;
- // Use declare_exit for the descriptor to end the structured declare region
- // instead of issuing an update.
- mlir::acc::GetDevicePtrOp descEntryOp =
- createDataEntryOp<mlir::acc::GetDevicePtrOp>(
- builder, loc, addrOp, asFortran, bounds,
- /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
- /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
- mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
- mlir::ValueRange(descEntryOp.getAccVar()));
+ if (unwrapFirBox) {
+ // Unwrap mode: update the descriptor after deallocation (no declare_exit).
+ mlir::acc::UpdateDeviceOp updDesc =
+ createDataEntryOp<mlir::acc::UpdateDeviceOp>(
+ builder, loc, addrOp, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/true,
+ mlir::acc::DataClause::acc_update_device, addrOp.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ llvm::SmallVector<int32_t> seg{0, 0, 0, 1};
+ llvm::SmallVector<mlir::Value> ops{updDesc.getResult()};
+ createSimpleOp<mlir::acc::UpdateOp>(builder, loc, ops, seg);
+ } else {
+ // Default: end the structured declare region using declare_exit.
+ mlir::acc::GetDevicePtrOp descEntryOp =
+ createDataEntryOp<mlir::acc::GetDevicePtrOp>(
+ builder, loc, addrOp, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
+ mlir::ValueRange(descEntryOp.getAccVar()));
+ }
modBuilder.setInsertionPointAfter(postDeallocOp);
}
diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
index 8ccaae829dfdf..19566b31af108 100644
--- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
@@ -1,7 +1,7 @@
! This test checks lowering of OpenACC declare directive in function and
! subroutine specification parts.
-! RUN: bbc -fopenacc -emit-hlfir --openacc-unwrap-fir-box=true --openacc-generate-default-bounds=true %s -o - | FileCheck %s
+! RUN: bbc -fopenacc -emit-hlfir --openacc-unwrap-fir-box=true --openacc-generate-default-bounds=true %s -o - | FileCheck %s
module acc_declare
contains
@@ -258,6 +258,8 @@ subroutine acc_declare_allocate()
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a_desc", structured = false}
+! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "a", structured = false}
@@ -279,8 +281,8 @@ subroutine acc_declare_allocate()
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK: %[[GETDEVICEPTR:.*]] = acc.getdeviceptr varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "a_desc", structured = false}
-! CHECK: acc.declare_exit dataOperands(%[[GETDEVICEPTR]] : !fir.heap<!fir.array<?xi32>>)
+! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {implicit = true, name = "a_desc", structured = false}
+! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.heap<!fir.array<?xi32>>)
! CHECK: return
! CHECK: }
@@ -353,8 +355,8 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
-! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
+! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[LOAD:.*]] = fir.load %[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOXADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "data1", structured = false}
@@ -374,8 +376,8 @@ module acc_declare_allocatable_test
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_dealloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "data1_desc", structured = false}
-! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
+! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }
@@ -473,4 +475,4 @@ subroutine init()
! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: fir.if
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
\ No newline at end of file
More information about the flang-commits
mailing list