[Mlir-commits] [flang] [mlir] [flang][OpenACC] Fix implicit data mapping for deviceptr inside host_data (PR #192710)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Apr 17 11:38:06 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-openacc
@llvm/pr-subscribers-mlir-openacc
Author: khaki3
<details>
<summary>Changes</summary>
Example:
```fortran
module test_mod
real, allocatable :: a(:,:)
contains
subroutine f(b)
real, dimension(*) :: b
!$acc data deviceptr(b(1:100))
!$acc serial
print *, b(10)
!$acc end serial
!$acc end data
end subroutine f
end module test_mod
program test
use test_mod
call alloc_a
!$acc data present(a)
!$acc host_data use_device(a)
call f( a(:,5) )
!$acc end host_data
!$acc end data
end program
```
When subroutine `f` is inlined, the `ACCImplicitData` pass fails to recognize that `b` is already covered by the enclosing `!$acc data deviceptr`. The deviceptr clause operates on a box (`fir.embox` result) while the inner `acc.serial` uses the underlying ref, so alias analysis cannot match them. The pass falls back to generating an implicit `acc.copyin`/`acc.copyout`, which tries to copy from a device pointer address on the host, causing a segfault.
Fix:
- Register `fir::BoxAddrOp` as implementing `PartialEntityAccessOpInterface` so that `isDeviceValue` can trace through `box_addr` operations.
- In `getOriginalDataClauseOpForAlias`, for `DevicePtrOp` clauses additionally check if the clause variable is directly derived from the candidate (e.g., `deviceptr` operates on `embox(candidate)`).
---
Full diff: https://github.com/llvm/llvm-project/pull/192710.diff
5 Files Affected:
- (modified) flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h (+1)
- (modified) flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp (+6)
- (modified) flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp (+2)
- (modified) flang/test/Transforms/OpenACC/acc-implicit-data.fir (+34)
- (modified) mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp (+15-2)
``````````diff
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
index 4ffa0877ff190..854faba45bd2e 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
@@ -19,6 +19,7 @@
namespace fir {
class AddrOfOp;
+class BoxAddrOp;
class DeclareOp;
class GlobalOp;
} // namespace fir
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
index 6d2c6ea5c8e57..eeb1b0bdfa4db 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
@@ -64,6 +64,12 @@ mlir::Value PartialEntityAccessModel<fir::CoordinateOp>::getBaseEntity(
return mlir::cast<fir::CoordinateOp>(op).getRef();
}
+template <>
+mlir::Value PartialEntityAccessModel<fir::BoxAddrOp>::getBaseEntity(
+ mlir::Operation *op) const {
+ return mlir::cast<fir::BoxAddrOp>(op).getVal();
+}
+
template <>
mlir::Value PartialEntityAccessModel<hlfir::DesignateOp>::getBaseEntity(
mlir::Operation *op) const {
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index f2fa5bf38872d..1766b7cc1d675 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -53,6 +53,8 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) {
PartialEntityAccessModel<fir::CoordinateOp>>(*ctx);
fir::DeclareOp::attachInterface<PartialEntityAccessModel<fir::DeclareOp>>(
*ctx);
+ fir::BoxAddrOp::attachInterface<PartialEntityAccessModel<fir::BoxAddrOp>>(
+ *ctx);
fir::AddrOfOp::attachInterface<AddressOfGlobalModel>(*ctx);
fir::GlobalOp::attachInterface<GlobalVariableModel>(*ctx);
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data.fir b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
index 050fe55747d23..58c567c831715 100644
--- a/flang/test/Transforms/OpenACC/acc-implicit-data.fir
+++ b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
@@ -394,3 +394,37 @@ func.func private @_FortranAAllocatableSetBounds(!fir.ref<!fir.box<none>>, i32,
// CHECK-NOT: acc.copyin
// CHECK: acc.deviceptr
// CHECK-NOT: acc.copyout
+
+// -----
+
+// Test that acc.serial inside acc.data deviceptr generates implicit deviceptr
+// (not copyin) when the deviceptr var is an embox of the ref used by serial.
+// This pattern arises when a subroutine with !$acc data deviceptr(b) wrapping
+// !$acc serial is inlined and the deviceptr var is an embox of the ref used
+// by the serial construct.
+func.func @test_serial_inside_data_deviceptr_embox() {
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ %arr = fir.alloca !fir.array<10xf32> {bindc_name = "b"}
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+ %arr_decl = fir.declare %arr(%shape) {uniq_name = "b"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xf32>>
+ %box = fir.embox %arr_decl(%shape) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
+ %devptr = acc.deviceptr var(%box : !fir.box<!fir.array<10xf32>>) -> !fir.box<!fir.array<10xf32>> {name = "b(1:10)"}
+ acc.data dataOperands(%devptr : !fir.box<!fir.array<10xf32>>) {
+ acc.serial {
+ %elem = fir.array_coor %arr_decl(%shape) %c1 : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+ %val = fir.load %elem : !fir.ref<f32>
+ acc.yield
+ }
+ acc.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @test_serial_inside_data_deviceptr_embox
+// CHECK: acc.deviceptr var({{.*}} : !fir.box<!fir.array<10xf32>>) -> !fir.box<!fir.array<10xf32>> {name = "b(1:10)"}
+// CHECK: acc.data
+// CHECK: acc.deviceptr varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "b"}
+// CHECK: acc.serial dataOperands({{.*}} : !fir.ref<!fir.array<10xf32>>)
+// CHECK-NOT: acc.copyin
+// CHECK-NOT: acc.copyout
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
index 95c8d1076ccb0..d543b4f6b99d6 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
@@ -311,9 +311,22 @@ Operation *ACCImplicitData::getOriginalDataClauseOpForAlias(
if (auto *dataClauseOp = dataClause.getDefiningOp()) {
// Only accept clauses that guarantee that the alias is present.
if (isa<acc::CopyinOp, acc::CreateOp, acc::PresentOp, acc::NoCreateOp,
- acc::DevicePtrOp>(dataClauseOp))
- if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust())
+ acc::DevicePtrOp>(dataClauseOp)) {
+ Value clauseVar = acc::getVar(dataClauseOp);
+ if (aliasAnalysis.alias(clauseVar, var).isMust())
return dataClauseOp;
+ // For deviceptr clauses, also check if the clause variable is
+ // directly derived from 'var' (e.g., deviceptr operates on
+ // embox(var) — the box wrapping var). This arises when a
+ // subroutine with deviceptr is inlined and the deviceptr's box
+ // and the compute region's ref are different SSA values.
+ if (isa<acc::DevicePtrOp>(dataClauseOp)) {
+ for (Operation *user : var.getUsers()) {
+ if (llvm::is_contained(user->getResults(), clauseVar))
+ return dataClauseOp;
+ }
+ }
+ }
}
}
return nullptr;
``````````
</details>
https://github.com/llvm/llvm-project/pull/192710
More information about the Mlir-commits
mailing list