[flang-commits] [flang] [flang] [cuda] Exclude non-variable actuals from unified-memory CUDA data attribute relaxation (PR #206121)

Fri Jun 26 09:06:04 PDT 2026

https://github.com/wangzpgi created https://github.com/llvm/llvm-project/pull/206121

Under -gpu=mem:unified, the CUDA data attribute compatibility check (AreCompatibleCUDADataAttrs) allows an unattributed actual argument to bind to a Device/Managed/Unified dummy. This relaxation is correct for variables whose storage is accessible from device code under unified memory, but not for non-variable expression results (e.g. intrinsic call results like RESHAPE(...)), which are host temporaries.

Without this fix, a generic SUM call like sum(reshape(hostArray, ...), dim=2) could resolve to a device-specific overload instead of the intrinsic, because the RESHAPE result was incorrectly considered compatible with a DEVICE-attributed dummy.

This patch:

- Adds an actualIsVariable parameter to AreCompatibleCUDADataAttrs (defaulting to true for backward compatibility) and gates the unified-memory relaxation for Device/Managed/Unified dummies on it.
- Passes actualIsVariable from CheckExplicitDataArg in check-call.
- Stops setting actualCanUseImplicitCudaMemoryMode for non-variable expressions in GetMatchingDistance, so the CUDA matching distance ranking is also correct.

>From 082da5518a6c6ceef49eee6da2332c70177349aa Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 25 Jun 2026 14:04:32 -0700
Subject: [PATCH 1/2] Do not promote non-variable actuals to Device dummies
 under unified memory

---
 flang/lib/Semantics/expression.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 02263fae0e55d..5ba8133bfbd00 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -2892,7 +2892,11 @@ static int GetMatchingDistance(const common::LanguageFeatureControl &features,
           }
         }
       } else if (const auto *actualLastSymbol{evaluate::GetLastSymbol(*expr)}) {
-        actualCanUseImplicitCudaMemoryMode = true;
+        // Propagate any explicit CUDA data attribute from the referenced
+        // symbol (e.g. a device array operand inside RESHAPE()) so that
+        // Device-attributed dummies still match. Do NOT set
+        // actualCanUseImplicitCudaMemoryMode: the expression result is a
+        // temporary, not a user variable with unified/managed storage.
         const Symbol &resolved{
             semantics::ResolveAssociations(*actualLastSymbol)};
         if (const auto *actualObject{

>From 51884dba1acfc8e926a7de1b8bb21e202c1697d2 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 25 Jun 2026 18:25:39 -0700
Subject: [PATCH 2/2] Exclude non-variable actuals from unified-memory CUDA
 data attribute relaxation

---
 flang/include/flang/Support/Fortran.h |  3 ++-
 flang/lib/Semantics/check-call.cpp    |  3 ++-
 flang/lib/Support/Fortran.cpp         | 11 +++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/flang/include/flang/Support/Fortran.h b/flang/include/flang/Support/Fortran.h
index 1118b2f8080a8..95290e7a36b20 100644
--- a/flang/include/flang/Support/Fortran.h
+++ b/flang/include/flang/Support/Fortran.h
@@ -104,7 +104,8 @@ std::string AsFortran(IgnoreTKRSet);
 bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr>,
     std::optional<CUDADataAttr>, IgnoreTKRSet, bool allowUnifiedMatchingRule,
     bool isHostDeviceProcedure,
-    const LanguageFeatureControl *features = nullptr);
+    const LanguageFeatureControl *features = nullptr,
+    bool actualIsVariable = true);
 
 // Format vector type as Fortran string
 std::string FormatVectorTypeAsFortran(
diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index f91b9b1d0b67d..464e555608601 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -1186,7 +1186,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
     if (!skipCudaDataAttrCheck &&
         !common::AreCompatibleCUDADataAttrs(dummyDataAttr, actualDataAttr,
             dummy.ignoreTKR, /*allowUnifiedMatchingRule=*/true,
-            isHostDeviceProc, &context.languageFeatures())) {
+            isHostDeviceProc, &context.languageFeatures(),
+            actualIsVariable)) {
       auto toStr{[](std::optional<common::CUDADataAttr> x) {
         return x ? "ATTRIBUTES("s +
                 parser::ToUpperCaseLetters(common::EnumToString(*x)) + ")"s
diff --git a/flang/lib/Support/Fortran.cpp b/flang/lib/Support/Fortran.cpp
index e3263df9fcbf7..80243d2b10b22 100644
--- a/flang/lib/Support/Fortran.cpp
+++ b/flang/lib/Support/Fortran.cpp
@@ -108,7 +108,7 @@ std::string AsFortran(IgnoreTKRSet tkr) {
 bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
     std::optional<CUDADataAttr> y, IgnoreTKRSet ignoreTKR,
     bool allowUnifiedMatchingRule, bool isHostDeviceProcedure,
-    const LanguageFeatureControl *features) {
+    const LanguageFeatureControl *features, bool actualIsVariable) {
   bool isCudaManaged{features
           ? features->IsEnabled(common::LanguageFeature::CudaManaged)
           : false};
@@ -160,18 +160,21 @@ bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
         // by host modules to mark device-typed dummies as overload
         // discriminators that should only accept actuals with an explicit
         // device/managed/unified attribute.
+        // Non-variable actuals (expression results, intrinsic call results)
+        // are host temporaries whose storage is not accessible from device
+        // code even under unified memory, so the relaxation does not apply.
         if (!y && (isCudaUnified || isCudaManaged) &&
-            !ignoreTKR.test(IgnoreTKR::Managed)) {
+            !ignoreTKR.test(IgnoreTKR::Managed) && actualIsVariable) {
           return true;
         }
       } else if (*x == CUDADataAttr::Managed) {
         if ((y && *y == CUDADataAttr::Unified) ||
-            (!y && (isCudaUnified || isCudaManaged))) {
+            (!y && (isCudaUnified || isCudaManaged) && actualIsVariable)) {
           return true;
         }
       } else if (*x == CUDADataAttr::Unified) {
         if ((y && *y == CUDADataAttr::Managed) ||
-            (!y && (isCudaUnified || isCudaManaged))) {
+            (!y && (isCudaUnified || isCudaManaged) && actualIsVariable)) {
           return true;
         }
       }