[flang-commits] [flang] [flang][cuda][openacc] Add UseDevice attribute to model host_data use_device symbols (PR #195182)
Zhen Wang via flang-commits
flang-commits at lists.llvm.org
Mon May 4 13:54:27 PDT 2026
https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/195182
>From 02cae04eabf2c78d872fcf464a05cb73f66212e8 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 30 Apr 2026 14:11:22 -0700
Subject: [PATCH 1/3] add UseDevice attribute
---
.../Dialect/CUF/Attributes/CUFAttr.h | 3 +
flang/include/flang/Support/Fortran.h | 2 +-
flang/lib/Semantics/check-cuda.cpp | 10 ++-
flang/lib/Semantics/check-declarations.cpp | 2 +
flang/lib/Semantics/expression.cpp | 12 +++
flang/lib/Semantics/resolve-names.cpp | 6 +-
flang/lib/Semantics/type.cpp | 2 +-
flang/lib/Support/Fortran.cpp | 4 +
flang/test/Semantics/cuf27.cuf | 85 +++++++++++++++++++
9 files changed, 119 insertions(+), 7 deletions(-)
create mode 100644 flang/test/Semantics/cuf27.cuf
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
index f08a89c47170c..88a47749b8808 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
+++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
@@ -73,6 +73,9 @@ getDataAttribute(mlir::MLIRContext *mlirContext,
case Fortran::common::CUDADataAttr::Unified:
attr = cuf::DataAttribute::Unified;
break;
+ case Fortran::common::CUDADataAttr::UseDevice:
+ attr = cuf::DataAttribute::Device;
+ break;
case Fortran::common::CUDADataAttr::Value:
return {}; // Extension, not a real CUDA Fortran data attribute
}
diff --git a/flang/include/flang/Support/Fortran.h b/flang/include/flang/Support/Fortran.h
index 4939f1d09067f..c71e1164ef3ab 100644
--- a/flang/include/flang/Support/Fortran.h
+++ b/flang/include/flang/Support/Fortran.h
@@ -66,7 +66,7 @@ ENUM_CLASS(CUDASubprogramAttrs, Host, Device, HostDevice, Global, Grid_Global)
// CUDA data attributes; mutually exclusive
ENUM_CLASS(CUDADataAttr, Constant, Device, Managed, Pinned, Shared, Texture,
- Unified, Value)
+ Unified, UseDevice, Value)
// OpenACC device types
ENUM_CLASS(
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index acf685088fd29..54f3d3654b04b 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -124,7 +124,8 @@ static bool IsHostArray(const Symbol &symbol) {
*details->cudaDataAttr() == common::CUDADataAttr::Constant ||
*details->cudaDataAttr() == common::CUDADataAttr::Managed ||
*details->cudaDataAttr() == common::CUDADataAttr::Shared ||
- *details->cudaDataAttr() == common::CUDADataAttr::Unified)) {
+ *details->cudaDataAttr() == common::CUDADataAttr::Unified ||
+ *details->cudaDataAttr() == common::CUDADataAttr::UseDevice)) {
return false;
}
}
@@ -178,7 +179,8 @@ struct FindHostArray
*details->cudaDataAttr() != common::CUDADataAttr::Constant &&
*details->cudaDataAttr() != common::CUDADataAttr::Managed &&
*details->cudaDataAttr() != common::CUDADataAttr::Shared &&
- *details->cudaDataAttr() != common::CUDADataAttr::Unified))) {
+ *details->cudaDataAttr() != common::CUDADataAttr::Unified &&
+ *details->cudaDataAttr() != common::CUDADataAttr::UseDevice))) {
return &symbol;
}
}
@@ -833,7 +835,9 @@ void CUDAChecker::Enter(const parser::PrintStmt &x) {
if (details->cudaDataAttr() &&
(*details->cudaDataAttr() == common::CUDADataAttr::Device ||
*details->cudaDataAttr() ==
- common::CUDADataAttr::Constant)) {
+ common::CUDADataAttr::Constant ||
+ *details->cudaDataAttr() ==
+ common::CUDADataAttr::UseDevice)) {
context_.Say(parser::FindSourceLocation(*x),
"device data not allowed in I/O statements"_err_en_US);
}
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index 4174cf6d1e340..f8e7e13b38136 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -1240,6 +1240,8 @@ void CheckHelper::CheckObjectEntity(
messages_.Say(
"ATTRIBUTES(TEXTURE) is obsolete and no longer supported"_err_en_US);
break;
+ case common::CUDADataAttr::UseDevice:
+ break;
}
if (attr != common::CUDADataAttr::Pinned) {
if (details.commonBlock()) {
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index ab0bb5a921be0..3de89a0d8c110 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -2930,6 +2930,18 @@ static int GetMatchingDistance(const common::LanguageFeatureControl &features,
return 0;
}
}
+ // UseDevice (OpenACC host_data use_device) has a device address but the
+ // underlying variable is host-resident. Prefer device dummies, but allow
+ // host dummies with a higher distance.
+ if (actualDataAttr && *actualDataAttr == common::CUDADataAttr::UseDevice) {
+ if (!dummyDataAttr)
+ return 3;
+ if (*dummyDataAttr == common::CUDADataAttr::Device)
+ return 0;
+ if (*dummyDataAttr == common::CUDADataAttr::Managed ||
+ *dummyDataAttr == common::CUDADataAttr::Unified)
+ return 2;
+ }
return cudaInfMatchingValue;
}
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 9ed2e73c155b0..f2adc0eca8949 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1615,12 +1615,14 @@ bool AccVisitor::Pre(const parser::OpenACCBlockConstruct &x) {
void AccVisitor::CopySymbolWithDevice(const parser::Name *name) {
// New symbols are created for those appearing in the use_device clause.
- // These new symbols get the CUDA device attribute.
+ // These new symbols get the CUDA UseDevice attribute so that generic
+ // resolution can distinguish them from true DEVICE variables: UseDevice
+ // actuals are compatible with both host and device dummy arguments.
if (name && name->symbol) {
Symbol *copy{currScope().CopySymbol(name->symbol->GetUltimate())};
if (copy) {
if (auto *object{copy->GetUltimate().detailsIf<ObjectEntityDetails>()}) {
- object->set_cudaDataAttr(common::CUDADataAttr::Device);
+ object->set_cudaDataAttr(common::CUDADataAttr::UseDevice);
}
} else {
copy = FindInScope(currScope(), name->symbol->GetUltimate().name());
diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp
index b2fd1e99ff0e8..f2c1637a604a9 100644
--- a/flang/lib/Semantics/type.cpp
+++ b/flang/lib/Semantics/type.cpp
@@ -736,7 +736,7 @@ static const DeclTypeSpec *CloneDerivedTypeForUseDeviceImpl(
if (path.size() == 1) {
if (Symbol * comp{newScope.FindComponent(path[0])}) {
if (auto *details{comp->detailsIf<ObjectEntityDetails>()}) {
- details->set_cudaDataAttr(common::CUDADataAttr::Device);
+ details->set_cudaDataAttr(common::CUDADataAttr::UseDevice);
}
}
}
diff --git a/flang/lib/Support/Fortran.cpp b/flang/lib/Support/Fortran.cpp
index 83a68b4cfa94c..6fcf41cd5e9df 100644
--- a/flang/lib/Support/Fortran.cpp
+++ b/flang/lib/Support/Fortran.cpp
@@ -117,6 +117,10 @@ bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
if (ignoreTKR.test(common::IgnoreTKR::Device)) {
return true;
}
+ // UseDevice (from OpenACC host_data use_device) is compatible with any dummy:
+ // it has a device address but the underlying variable may be host-resident.
+ if (y && *y == CUDADataAttr::UseDevice)
+ return true;
if (!y && isHostDeviceProcedure) {
return true;
}
diff --git a/flang/test/Semantics/cuf27.cuf b/flang/test/Semantics/cuf27.cuf
new file mode 100644
index 0000000000000..a3312f86247ab
--- /dev/null
+++ b/flang/test/Semantics/cuf27.cuf
@@ -0,0 +1,85 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenacc
+
+! Test that OpenACC host_data use_device variables (UseDevice attribute)
+! are compatible with both host and device dummy arguments in generic
+! resolution.
+
+module m
+ interface overl
+ module procedure overl_host
+ end interface
+contains
+ subroutine overl_host(x)
+ integer :: x(:)
+ end subroutine
+end module m
+
+module m2
+ interface dforce
+ module procedure dforce_host
+ module procedure dforce_device
+ end interface
+contains
+ subroutine dforce_host(x, y)
+ integer :: x(:), y(:)
+ end subroutine
+ subroutine dforce_device(x, y)
+ integer, device :: x(:), y(:)
+ end subroutine
+end module m2
+
+subroutine test_use_device_host_only()
+ use m
+ integer, allocatable :: fx(:)
+ allocate(fx(100))
+ !$acc data copy(fx)
+ !$acc host_data use_device(fx)
+ call overl(fx)
+ !$acc end host_data
+ !$acc end data
+ deallocate(fx)
+end
+
+subroutine test_use_device_with_device_specific()
+ use m2
+ integer, allocatable :: fx(:)
+ integer, device, allocatable :: fy(:)
+ allocate(fx(100), fy(100))
+ !$acc data copy(fx)
+ !$acc host_data use_device(fx)
+ call dforce(fx, fy)
+ !$acc end host_data
+ !$acc end data
+ deallocate(fx, fy)
+end
+
+module m3
+ integer, parameter :: dp = selected_real_kind(14,200)
+ complex(dp), allocatable, target, pinned :: vkb(:,:)
+ interface dforce2
+ module procedure dforce2_host
+ module procedure dforce2_gpu
+ end interface
+contains
+ subroutine dforce2_host(vkb, c, v)
+ complex(dp) :: vkb(:,:), c(:,:)
+ real(dp) :: v(:,:)
+ end subroutine
+ subroutine dforce2_gpu(vkb, c, v)
+ complex(dp), device :: vkb(:,:), c(:,:)
+ real(dp), device :: v(:,:)
+ end subroutine
+end module m3
+
+subroutine test_use_device_pinned_use_assoc()
+ use m3
+ complex(dp), device, allocatable :: c_d(:,:)
+ real(dp), device, allocatable :: v_d(:,:)
+ allocate(vkb(64,64), c_d(64,64), v_d(64,64))
+ !$acc enter data copyin(vkb)
+ !$acc host_data use_device(vkb)
+ call dforce2(vkb, c_d, v_d)
+ !$acc end host_data
+ !$acc exit data delete(vkb)
+ deallocate(vkb, c_d, v_d)
+end
>From 5fbe82d181873ca4a1159b4ee44cdb95681be3b3 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 30 Apr 2026 14:55:37 -0700
Subject: [PATCH 2/3] format
---
flang/lib/Semantics/check-cuda.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 54f3d3654b04b..1efc537baf0f2 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -180,7 +180,8 @@ struct FindHostArray
*details->cudaDataAttr() != common::CUDADataAttr::Managed &&
*details->cudaDataAttr() != common::CUDADataAttr::Shared &&
*details->cudaDataAttr() != common::CUDADataAttr::Unified &&
- *details->cudaDataAttr() != common::CUDADataAttr::UseDevice))) {
+ *details->cudaDataAttr() !=
+ common::CUDADataAttr::UseDevice))) {
return &symbol;
}
}
>From 9869efff2c6b64f0dabbe0b19e65bd1deeba3233 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 4 May 2026 13:54:11 -0700
Subject: [PATCH 3/3] update comment
---
flang/lib/Semantics/expression.cpp | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 3de89a0d8c110..312842dc87f21 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -2930,9 +2930,15 @@ static int GetMatchingDistance(const common::LanguageFeatureControl &features,
return 0;
}
}
- // UseDevice (OpenACC host_data use_device) has a device address but the
- // underlying variable is host-resident. Prefer device dummies, but allow
- // host dummies with a higher distance.
+ // An actual argument with the UseDevice attribute comes from an OpenACC
+ // host_data use_device clause: the variable itself is host-resident, but
+ // inside the host_data region it is referenced via its device address.
+ // It can therefore match either a host dummy or a device dummy in generic
+ // resolution. The matching distance disambiguates when both kinds of
+ // specifics exist:
+ // - device dummy: 0 (best match: actual carries a device address)
+ // - managed/unified dummy: 2 (acceptable: dummy is reachable from device)
+ // - host dummy (no attr): 3 (acceptable: underlying variable is host)
if (actualDataAttr && *actualDataAttr == common::CUDADataAttr::UseDevice) {
if (!dummyDataAttr)
return 3;
More information about the flang-commits
mailing list