[Mlir-commits] [flang] [mlir] [flang][OpenACC] Fix implicit data mapping for deviceptr inside host_data (PR #192710)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Apr 17 11:38:06 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-openacc

@llvm/pr-subscribers-mlir-openacc

Author: khaki3

<details>
<summary>Changes</summary>

Example:

```fortran
module test_mod
  real, allocatable :: a(:,:)
contains
  subroutine f(b)
    real, dimension(*) :: b
    !$acc data deviceptr(b(1:100))
    !$acc serial
    print *, b(10)
    !$acc end serial
    !$acc end data
  end subroutine f
end module test_mod

program test
  use test_mod
  call alloc_a
  !$acc data present(a)
  !$acc host_data use_device(a)
  call f( a(:,5) )
  !$acc end host_data
  !$acc end data
end program
```

When subroutine `f` is inlined, the `ACCImplicitData` pass fails to recognize that `b` is already covered by the enclosing `!$acc data deviceptr`. The deviceptr clause operates on a box (`fir.embox` result) while the inner `acc.serial` uses the underlying ref, so alias analysis cannot match them. The pass falls back to generating an implicit `acc.copyin`/`acc.copyout`, which tries to copy from a device pointer address on the host, causing a segfault.

Fix:
- Register `fir::BoxAddrOp` as implementing `PartialEntityAccessOpInterface` so that `isDeviceValue` can trace through `box_addr` operations.
- In `getOriginalDataClauseOpForAlias`, for `DevicePtrOp` clauses additionally check if the clause variable is directly derived from the candidate (e.g., `deviceptr` operates on `embox(candidate)`).

---
Full diff: https://github.com/llvm/llvm-project/pull/192710.diff


5 Files Affected:

- (modified) flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h (+1) 
- (modified) flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp (+6) 
- (modified) flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp (+2) 
- (modified) flang/test/Transforms/OpenACC/acc-implicit-data.fir (+34) 
- (modified) mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp (+15-2) 


``````````diff
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
index 4ffa0877ff190..854faba45bd2e 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
@@ -19,6 +19,7 @@
 
 namespace fir {
 class AddrOfOp;
+class BoxAddrOp;
 class DeclareOp;
 class GlobalOp;
 } // namespace fir
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
index 6d2c6ea5c8e57..eeb1b0bdfa4db 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
@@ -64,6 +64,12 @@ mlir::Value PartialEntityAccessModel<fir::CoordinateOp>::getBaseEntity(
   return mlir::cast<fir::CoordinateOp>(op).getRef();
 }
 
+template <>
+mlir::Value PartialEntityAccessModel<fir::BoxAddrOp>::getBaseEntity(
+    mlir::Operation *op) const {
+  return mlir::cast<fir::BoxAddrOp>(op).getVal();
+}
+
 template <>
 mlir::Value PartialEntityAccessModel<hlfir::DesignateOp>::getBaseEntity(
     mlir::Operation *op) const {
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index f2fa5bf38872d..1766b7cc1d675 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -53,6 +53,8 @@ void registerOpenACCExtensions(mlir::DialectRegistry &registry) {
         PartialEntityAccessModel<fir::CoordinateOp>>(*ctx);
     fir::DeclareOp::attachInterface<PartialEntityAccessModel<fir::DeclareOp>>(
         *ctx);
+    fir::BoxAddrOp::attachInterface<PartialEntityAccessModel<fir::BoxAddrOp>>(
+        *ctx);
 
     fir::AddrOfOp::attachInterface<AddressOfGlobalModel>(*ctx);
     fir::GlobalOp::attachInterface<GlobalVariableModel>(*ctx);
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data.fir b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
index 050fe55747d23..58c567c831715 100644
--- a/flang/test/Transforms/OpenACC/acc-implicit-data.fir
+++ b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
@@ -394,3 +394,37 @@ func.func private @_FortranAAllocatableSetBounds(!fir.ref<!fir.box<none>>, i32,
 // CHECK-NOT: acc.copyin
 // CHECK: acc.deviceptr
 // CHECK-NOT: acc.copyout
+
+// -----
+
+// Test that acc.serial inside acc.data deviceptr generates implicit deviceptr
+// (not copyin) when the deviceptr var is an embox of the ref used by serial.
+// This pattern arises when a subroutine with !$acc data deviceptr(b) wrapping
+// !$acc serial is inlined and the deviceptr var is an embox of the ref used
+// by the serial construct.
+func.func @test_serial_inside_data_deviceptr_embox() {
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
+  %arr = fir.alloca !fir.array<10xf32> {bindc_name = "b"}
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+  %arr_decl = fir.declare %arr(%shape) {uniq_name = "b"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xf32>>
+  %box = fir.embox %arr_decl(%shape) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
+  %devptr = acc.deviceptr var(%box : !fir.box<!fir.array<10xf32>>) -> !fir.box<!fir.array<10xf32>> {name = "b(1:10)"}
+  acc.data dataOperands(%devptr : !fir.box<!fir.array<10xf32>>) {
+    acc.serial {
+      %elem = fir.array_coor %arr_decl(%shape) %c1 : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+      %val = fir.load %elem : !fir.ref<f32>
+      acc.yield
+    }
+    acc.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @test_serial_inside_data_deviceptr_embox
+// CHECK: acc.deviceptr var({{.*}} : !fir.box<!fir.array<10xf32>>) -> !fir.box<!fir.array<10xf32>> {name = "b(1:10)"}
+// CHECK: acc.data
+// CHECK: acc.deviceptr varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "b"}
+// CHECK: acc.serial dataOperands({{.*}} : !fir.ref<!fir.array<10xf32>>)
+// CHECK-NOT: acc.copyin
+// CHECK-NOT: acc.copyout
diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
index 95c8d1076ccb0..d543b4f6b99d6 100644
--- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
+++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
@@ -311,9 +311,22 @@ Operation *ACCImplicitData::getOriginalDataClauseOpForAlias(
     if (auto *dataClauseOp = dataClause.getDefiningOp()) {
       // Only accept clauses that guarantee that the alias is present.
       if (isa<acc::CopyinOp, acc::CreateOp, acc::PresentOp, acc::NoCreateOp,
-              acc::DevicePtrOp>(dataClauseOp))
-        if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust())
+              acc::DevicePtrOp>(dataClauseOp)) {
+        Value clauseVar = acc::getVar(dataClauseOp);
+        if (aliasAnalysis.alias(clauseVar, var).isMust())
           return dataClauseOp;
+        // For deviceptr clauses, also check if the clause variable is
+        // directly derived from 'var' (e.g., deviceptr operates on
+        // embox(var) — the box wrapping var). This arises when a
+        // subroutine with deviceptr is inlined and the deviceptr's box
+        // and the compute region's ref are different SSA values.
+        if (isa<acc::DevicePtrOp>(dataClauseOp)) {
+          for (Operation *user : var.getUsers()) {
+            if (llvm::is_contained(user->getResults(), clauseVar))
+              return dataClauseOp;
+          }
+        }
+      }
     }
   }
   return nullptr;

``````````

</details>


https://github.com/llvm/llvm-project/pull/192710


More information about the Mlir-commits mailing list