[flang-commits] [flang] Revert "[flang][cuda] Do not emit data transfer for constant read on the rhs" (PR #205394)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Tue Jun 23 11:03:09 PDT 2026


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/205394

Reverts llvm/llvm-project#205185

this is making couple of downstream tests failing. Another approach is needed

>From c2b7927c6543349cc97781dcdf41ed12c2223be7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Tue, 23 Jun 2026 11:02:36 -0700
Subject: [PATCH] =?UTF-8?q?Revert=20"[flang][cuda]=20Do=20not=20emit=20dat?=
 =?UTF-8?q?a=20transfer=20for=20constant=20read=20on=20the=20rhs=20?=
 =?UTF-8?q?=E2=80=A6"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 7781a0c196acda252a1db67936b76e96e9971f4c.
---
 flang/include/flang/Evaluate/tools.h         | 27 --------------------
 flang/lib/Evaluate/tools.cpp                 |  3 ++-
 flang/test/Lower/CUDA/cuda-data-transfer.cuf | 14 +---------
 3 files changed, 3 insertions(+), 41 deletions(-)

diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 08468f304914b..d2d0b69e6337d 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1321,15 +1321,6 @@ inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) {
   return false;
 }
 
-inline bool IsCUDAConstantSymbol(const Symbol &sym) {
-  if (const auto *details =
-          sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
-    return details->cudaDataAttr() &&
-        (*details->cudaDataAttr() == common::CUDADataAttr::Constant);
-  }
-  return false;
-}
-
 // Non-allocatable module-level managed/unified variables use pointer
 // indirection through a companion global in __nv_managed_data__.
 // Explicit data transfers (cudaMemcpy) must be avoided for these
@@ -1380,16 +1371,6 @@ inline int GetNbOfCUDAManagedOrUnifiedSymbols(const A &expr) {
   return symbols.size();
 }
 
-template <typename A> inline int GetNbOfCUDAConstantSymbols(const A &expr) {
-  semantics::UnorderedSymbolSet symbols;
-  for (const Symbol &sym : CollectCudaSymbols(expr)) {
-    if (IsCUDAConstantSymbol(sym)) {
-      symbols.insert(sym);
-    }
-  }
-  return symbols.size();
-}
-
 // Check if any of the symbols part of the expression has a CUDA device
 // attribute.
 template <typename A> inline bool HasCUDADeviceAttrs(const A &expr) {
@@ -1402,19 +1383,11 @@ template <typename A, typename B>
 inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) {
   int lhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(lhs)};
   int rhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(rhs)};
-  int rhsNbConstantSymbols{GetNbOfCUDAConstantSymbols(rhs)};
   int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)};
 
   if (HasNonAllocatableModuleCUDAManagedSymbols(lhs))
     return false;
 
-  // If only constant symbols are present on the rhs, and no device symbols on
-  // the lhs, then no data transfer is needed because the constant have a host
-  // value.
-  if (rhsNbConstantSymbols == rhsNbSymbols && !HasCUDADeviceAttrs(lhs)) {
-    return false;
-  }
-
   if (lhsNbManagedSymbols >= 1 && lhs.Rank() > 0 && rhsNbSymbols == 0 &&
       rhsNbManagedSymbols == 0 && (IsVariable(rhs) || IsConstantExpr(rhs))) {
     return true; // Managed arrays initialization is performed on the device.
diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp
index a60e36654ca34..82dcd1e795f49 100644
--- a/flang/lib/Evaluate/tools.cpp
+++ b/flang/lib/Evaluate/tools.cpp
@@ -1210,7 +1210,8 @@ bool IsCUDADeviceOnlySymbol(const Symbol &sym) {
   if (const auto *details =
           sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
     return details->cudaDataAttr() &&
-        (*details->cudaDataAttr() == common::CUDADataAttr::Device);
+        (*details->cudaDataAttr() == common::CUDADataAttr::Device ||
+            *details->cudaDataAttr() == common::CUDADataAttr::Constant);
   }
   return false;
 }
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index f236e829072ee..a1006437485ca 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -3,8 +3,6 @@
 ! Test CUDA Fortran data transfer using assignment statements.
 
 module mod1
-  real, constant :: c1 = 1.0
-
   type :: t1
     integer :: i
   end type
@@ -497,7 +495,7 @@ subroutine sub25()
 end
 
 ! CHECK-LABEL: func.func @_QPsub25()
-! CHECK: fir.allocmem !fir.array<?xf64>, %{{.*}} {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: fir.allocmem !fir.array<?xf64>, %15#1 {bindc_name = ".tmp", uniq_name = ""}
 ! CHECK: cuf.data_transfer %{{.*}} to %{{.*}} {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.box<!fir.array<?xf64>>
 ! CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref<f64>
 ! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xf64>>
@@ -726,13 +724,3 @@ subroutine sub41()
   
   lm(1:5) = a%m(1:5)
 end subroutine
-
-subroutine sub42()
-  use mod1
-  real :: a
-  a = c1 * c1
-end subroutine
-
-! CHECK-LABEL: func.func @_QPsub42()
-! CHECK-NOT: cuf.data_transfer
-! CHECK: hlfir.assign



More information about the flang-commits mailing list