[flang-commits] [flang] [flang][cuda][openacc] Add UseDevice attribute to model host_data use_device symbols (PR #195182)

Zhen Wang via flang-commits flang-commits at lists.llvm.org
Thu Apr 30 14:55:53 PDT 2026


https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/195182

>From 02cae04eabf2c78d872fcf464a05cb73f66212e8 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 30 Apr 2026 14:11:22 -0700
Subject: [PATCH 1/2] add UseDevice attribute

---
 .../Dialect/CUF/Attributes/CUFAttr.h          |  3 +
 flang/include/flang/Support/Fortran.h         |  2 +-
 flang/lib/Semantics/check-cuda.cpp            | 10 ++-
 flang/lib/Semantics/check-declarations.cpp    |  2 +
 flang/lib/Semantics/expression.cpp            | 12 +++
 flang/lib/Semantics/resolve-names.cpp         |  6 +-
 flang/lib/Semantics/type.cpp                  |  2 +-
 flang/lib/Support/Fortran.cpp                 |  4 +
 flang/test/Semantics/cuf27.cuf                | 85 +++++++++++++++++++
 9 files changed, 119 insertions(+), 7 deletions(-)
 create mode 100644 flang/test/Semantics/cuf27.cuf

diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
index f08a89c47170c..88a47749b8808 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
+++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
@@ -73,6 +73,9 @@ getDataAttribute(mlir::MLIRContext *mlirContext,
     case Fortran::common::CUDADataAttr::Unified:
       attr = cuf::DataAttribute::Unified;
       break;
+    case Fortran::common::CUDADataAttr::UseDevice:
+      attr = cuf::DataAttribute::Device;
+      break;
     case Fortran::common::CUDADataAttr::Value:
       return {}; // Extension, not a real CUDA Fortran data attribute
     }
diff --git a/flang/include/flang/Support/Fortran.h b/flang/include/flang/Support/Fortran.h
index 4939f1d09067f..c71e1164ef3ab 100644
--- a/flang/include/flang/Support/Fortran.h
+++ b/flang/include/flang/Support/Fortran.h
@@ -66,7 +66,7 @@ ENUM_CLASS(CUDASubprogramAttrs, Host, Device, HostDevice, Global, Grid_Global)
 
 // CUDA data attributes; mutually exclusive
 ENUM_CLASS(CUDADataAttr, Constant, Device, Managed, Pinned, Shared, Texture,
-    Unified, Value)
+    Unified, UseDevice, Value)
 
 // OpenACC device types
 ENUM_CLASS(
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index acf685088fd29..54f3d3654b04b 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -124,7 +124,8 @@ static bool IsHostArray(const Symbol &symbol) {
             *details->cudaDataAttr() == common::CUDADataAttr::Constant ||
             *details->cudaDataAttr() == common::CUDADataAttr::Managed ||
             *details->cudaDataAttr() == common::CUDADataAttr::Shared ||
-            *details->cudaDataAttr() == common::CUDADataAttr::Unified)) {
+            *details->cudaDataAttr() == common::CUDADataAttr::Unified ||
+            *details->cudaDataAttr() == common::CUDADataAttr::UseDevice)) {
       return false;
     }
   }
@@ -178,7 +179,8 @@ struct FindHostArray
                   *details->cudaDataAttr() != common::CUDADataAttr::Constant &&
                   *details->cudaDataAttr() != common::CUDADataAttr::Managed &&
                   *details->cudaDataAttr() != common::CUDADataAttr::Shared &&
-                  *details->cudaDataAttr() != common::CUDADataAttr::Unified))) {
+                  *details->cudaDataAttr() != common::CUDADataAttr::Unified &&
+                  *details->cudaDataAttr() != common::CUDADataAttr::UseDevice))) {
         return &symbol;
       }
     }
@@ -833,7 +835,9 @@ void CUDAChecker::Enter(const parser::PrintStmt &x) {
             if (details->cudaDataAttr() &&
                 (*details->cudaDataAttr() == common::CUDADataAttr::Device ||
                     *details->cudaDataAttr() ==
-                        common::CUDADataAttr::Constant)) {
+                        common::CUDADataAttr::Constant ||
+                    *details->cudaDataAttr() ==
+                        common::CUDADataAttr::UseDevice)) {
               context_.Say(parser::FindSourceLocation(*x),
                   "device data not allowed in I/O statements"_err_en_US);
             }
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index 4174cf6d1e340..f8e7e13b38136 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -1240,6 +1240,8 @@ void CheckHelper::CheckObjectEntity(
       messages_.Say(
           "ATTRIBUTES(TEXTURE) is obsolete and no longer supported"_err_en_US);
       break;
+    case common::CUDADataAttr::UseDevice:
+      break;
     }
     if (attr != common::CUDADataAttr::Pinned) {
       if (details.commonBlock()) {
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index ab0bb5a921be0..3de89a0d8c110 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -2930,6 +2930,18 @@ static int GetMatchingDistance(const common::LanguageFeatureControl &features,
       return 0;
     }
   }
+  // UseDevice (OpenACC host_data use_device) has a device address but the
+  // underlying variable is host-resident.  Prefer device dummies, but allow
+  // host dummies with a higher distance.
+  if (actualDataAttr && *actualDataAttr == common::CUDADataAttr::UseDevice) {
+    if (!dummyDataAttr)
+      return 3;
+    if (*dummyDataAttr == common::CUDADataAttr::Device)
+      return 0;
+    if (*dummyDataAttr == common::CUDADataAttr::Managed ||
+        *dummyDataAttr == common::CUDADataAttr::Unified)
+      return 2;
+  }
   return cudaInfMatchingValue;
 }
 
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 9ed2e73c155b0..f2adc0eca8949 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1615,12 +1615,14 @@ bool AccVisitor::Pre(const parser::OpenACCBlockConstruct &x) {
 
 void AccVisitor::CopySymbolWithDevice(const parser::Name *name) {
   // New symbols are created for those appearing in the use_device clause.
-  // These new symbols get the CUDA device attribute.
+  // These new symbols get the CUDA UseDevice attribute so that generic
+  // resolution can distinguish them from true DEVICE variables: UseDevice
+  // actuals are compatible with both host and device dummy arguments.
   if (name && name->symbol) {
     Symbol *copy{currScope().CopySymbol(name->symbol->GetUltimate())};
     if (copy) {
       if (auto *object{copy->GetUltimate().detailsIf<ObjectEntityDetails>()}) {
-        object->set_cudaDataAttr(common::CUDADataAttr::Device);
+        object->set_cudaDataAttr(common::CUDADataAttr::UseDevice);
       }
     } else {
       copy = FindInScope(currScope(), name->symbol->GetUltimate().name());
diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp
index b2fd1e99ff0e8..f2c1637a604a9 100644
--- a/flang/lib/Semantics/type.cpp
+++ b/flang/lib/Semantics/type.cpp
@@ -736,7 +736,7 @@ static const DeclTypeSpec *CloneDerivedTypeForUseDeviceImpl(
   if (path.size() == 1) {
     if (Symbol * comp{newScope.FindComponent(path[0])}) {
       if (auto *details{comp->detailsIf<ObjectEntityDetails>()}) {
-        details->set_cudaDataAttr(common::CUDADataAttr::Device);
+        details->set_cudaDataAttr(common::CUDADataAttr::UseDevice);
       }
     }
   }
diff --git a/flang/lib/Support/Fortran.cpp b/flang/lib/Support/Fortran.cpp
index 83a68b4cfa94c..6fcf41cd5e9df 100644
--- a/flang/lib/Support/Fortran.cpp
+++ b/flang/lib/Support/Fortran.cpp
@@ -117,6 +117,10 @@ bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
   if (ignoreTKR.test(common::IgnoreTKR::Device)) {
     return true;
   }
+  // UseDevice (from OpenACC host_data use_device) is compatible with any dummy:
+  // it has a device address but the underlying variable may be host-resident.
+  if (y && *y == CUDADataAttr::UseDevice)
+    return true;
   if (!y && isHostDeviceProcedure) {
     return true;
   }
diff --git a/flang/test/Semantics/cuf27.cuf b/flang/test/Semantics/cuf27.cuf
new file mode 100644
index 0000000000000..a3312f86247ab
--- /dev/null
+++ b/flang/test/Semantics/cuf27.cuf
@@ -0,0 +1,85 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenacc
+
+! Test that OpenACC host_data use_device variables (UseDevice attribute)
+! are compatible with both host and device dummy arguments in generic
+! resolution.
+
+module m
+  interface overl
+    module procedure overl_host
+  end interface
+contains
+  subroutine overl_host(x)
+    integer :: x(:)
+  end subroutine
+end module m
+
+module m2
+  interface dforce
+    module procedure dforce_host
+    module procedure dforce_device
+  end interface
+contains
+  subroutine dforce_host(x, y)
+    integer :: x(:), y(:)
+  end subroutine
+  subroutine dforce_device(x, y)
+    integer, device :: x(:), y(:)
+  end subroutine
+end module m2
+
+subroutine test_use_device_host_only()
+  use m
+  integer, allocatable :: fx(:)
+  allocate(fx(100))
+  !$acc data copy(fx)
+  !$acc host_data use_device(fx)
+  call overl(fx)
+  !$acc end host_data
+  !$acc end data
+  deallocate(fx)
+end
+
+subroutine test_use_device_with_device_specific()
+  use m2
+  integer, allocatable :: fx(:)
+  integer, device, allocatable :: fy(:)
+  allocate(fx(100), fy(100))
+  !$acc data copy(fx)
+  !$acc host_data use_device(fx)
+  call dforce(fx, fy)
+  !$acc end host_data
+  !$acc end data
+  deallocate(fx, fy)
+end
+
+module m3
+  integer, parameter :: dp = selected_real_kind(14,200)
+  complex(dp), allocatable, target, pinned :: vkb(:,:)
+  interface dforce2
+    module procedure dforce2_host
+    module procedure dforce2_gpu
+  end interface
+contains
+  subroutine dforce2_host(vkb, c, v)
+    complex(dp) :: vkb(:,:), c(:,:)
+    real(dp) :: v(:,:)
+  end subroutine
+  subroutine dforce2_gpu(vkb, c, v)
+    complex(dp), device :: vkb(:,:), c(:,:)
+    real(dp), device :: v(:,:)
+  end subroutine
+end module m3
+
+subroutine test_use_device_pinned_use_assoc()
+  use m3
+  complex(dp), device, allocatable :: c_d(:,:)
+  real(dp), device, allocatable :: v_d(:,:)
+  allocate(vkb(64,64), c_d(64,64), v_d(64,64))
+  !$acc enter data copyin(vkb)
+  !$acc host_data use_device(vkb)
+  call dforce2(vkb, c_d, v_d)
+  !$acc end host_data
+  !$acc exit data delete(vkb)
+  deallocate(vkb, c_d, v_d)
+end

>From 5fbe82d181873ca4a1159b4ee44cdb95681be3b3 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 30 Apr 2026 14:55:37 -0700
Subject: [PATCH 2/2] format

---
 flang/lib/Semantics/check-cuda.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 54f3d3654b04b..1efc537baf0f2 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -180,7 +180,8 @@ struct FindHostArray
                   *details->cudaDataAttr() != common::CUDADataAttr::Managed &&
                   *details->cudaDataAttr() != common::CUDADataAttr::Shared &&
                   *details->cudaDataAttr() != common::CUDADataAttr::Unified &&
-                  *details->cudaDataAttr() != common::CUDADataAttr::UseDevice))) {
+                  *details->cudaDataAttr() !=
+                      common::CUDADataAttr::UseDevice))) {
         return &symbol;
       }
     }



More information about the flang-commits mailing list