[flang-commits] [flang] [flang][cuda] Allow CUDA variable for assumed-type dummy (PR #196135)

via flang-commits flang-commits at lists.llvm.org
Wed May 6 10:54:56 PDT 2026


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-semantics

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/196135.diff


3 Files Affected:

- (modified) flang/lib/Semantics/check-call.cpp (+14-1) 
- (modified) flang/lib/Semantics/expression.cpp (+19) 
- (modified) flang/test/Semantics/cuf10.cuf (+24) 


``````````diff
diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index 2dd47508a0b3f..16699a9a026bd 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -345,6 +345,15 @@ static const llvm::StringSet<> cudaSkippedIntrinsics = {"__builtin_c_devloc",
     "allocated", "associated", "kind", "lbound", "loc", "present", "shape",
     "size", "sizeof", "ubound"};
 
+static bool IsCudaAddressSpaceAgnostic(
+    const characteristics::DummyDataObject &dummy) {
+  return !dummy.cudaDataAttr && dummy.type.type().IsAssumedType() &&
+      (dummy.type.attrs().test(
+           characteristics::TypeAndShape::Attr::AssumedSize) ||
+          dummy.type.attrs().test(
+              characteristics::TypeAndShape::Attr::AssumedRank));
+}
+
 static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
     const std::string &dummyName, evaluate::Expr<evaluate::SomeType> &actual,
     characteristics::TypeAndShape &actualType, bool isElemental,
@@ -1166,7 +1175,11 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
     bool isHostDeviceProc{procedure.cudaSubprogramAttrs &&
         *procedure.cudaSubprogramAttrs ==
             common::CUDASubprogramAttrs::HostDevice};
-    if (!common::AreCompatibleCUDADataAttrs(dummyDataAttr, actualDataAttr,
+    // TYPE(*) assumed-size/rank dummies are opaque buffers (e.g. MPI) and do
+    // not impose a CUDA address space on their actual argument.
+    bool skipCudaDataAttrCheck{IsCudaAddressSpaceAgnostic(dummy)};
+    if (!skipCudaDataAttrCheck &&
+        !common::AreCompatibleCUDADataAttrs(dummyDataAttr, actualDataAttr,
             dummy.ignoreTKR, /*allowUnifiedMatchingRule=*/true,
             isHostDeviceProc, &context.languageFeatures())) {
       auto toStr{[](std::optional<common::CUDADataAttr> x) {
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index ab0bb5a921be0..ffdb1ab6a8c16 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -2839,6 +2839,15 @@ static int CompareCudaMatchingDistance(
   return 0;
 }
 
+static bool IsCudaAddressSpaceAgnostic(
+    const characteristics::DummyDataObject &dummy) {
+  return !dummy.cudaDataAttr && dummy.type.type().IsAssumedType() &&
+      (dummy.type.attrs().test(
+           characteristics::TypeAndShape::Attr::AssumedSize) ||
+          dummy.type.attrs().test(
+              characteristics::TypeAndShape::Attr::AssumedRank));
+}
+
 // Compute the matching distance as described in section 3.2.3 of the CUDA
 // Fortran references.
 static int GetMatchingDistance(const common::LanguageFeatureControl &features,
@@ -2875,14 +2884,24 @@ static int GetMatchingDistance(const common::LanguageFeatureControl &features,
     }
   }
 
+  bool dummyIsCudaAddressSpaceAgnostic{false};
   common::visit(common::visitors{
                     [&](const characteristics::DummyDataObject &object) {
                       dummyDataAttr = object.cudaDataAttr;
+                      dummyIsCudaAddressSpaceAgnostic =
+                          IsCudaAddressSpaceAgnostic(object);
                     },
                     [&](const auto &) {},
                 },
       dummy.u);
 
+  if (actualDataAttr && dummyIsCudaAddressSpaceAgnostic) {
+    // TYPE(*) assumed-size/rank dummies model opaque buffers, so they can
+    // accept host or device storage. Keep a non-zero distance so an explicit
+    // DEVICE overload remains a better CUDA match.
+    return 3;
+  }
+
   if (!dummyDataAttr) {
     if (!actualDataAttr) {
       if (isCudaUnified || isCudaManaged) {
diff --git a/flang/test/Semantics/cuf10.cuf b/flang/test/Semantics/cuf10.cuf
index 86637a92b2196..df45f4324b2c2 100644
--- a/flang/test/Semantics/cuf10.cuf
+++ b/flang/test/Semantics/cuf10.cuf
@@ -72,3 +72,27 @@ module m
   end subroutine
 
 end
+
+module assumed_type_cuda_buffers
+  interface generic_recv
+    module procedure recv_assumed_size
+  end interface
+
+contains
+  subroutine recv_assumed_size(buf, count)
+    type(*) :: buf(1_8:*)
+    integer, intent(in) :: count
+  end subroutine
+
+  subroutine recv_assumed_rank(buf, count)
+    type(*) :: buf(..)
+    integer, intent(in) :: count
+  end subroutine
+
+  subroutine test
+    real, device :: d(10)
+    call recv_assumed_size(d, 10)
+    call recv_assumed_rank(d, 10)
+    call generic_recv(d, 10)
+  end subroutine
+end module

``````````

</details>


https://github.com/llvm/llvm-project/pull/196135


More information about the flang-commits mailing list