[flang-commits] [flang] Revert "[flang][cuda] Do not emit data transfer for constant read on the rhs" (PR #205394)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Jun 23 11:03:09 PDT 2026
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/205394
Reverts llvm/llvm-project#205185
this is making couple of downstream tests failing. Another approach is needed
>From c2b7927c6543349cc97781dcdf41ed12c2223be7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
=?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
=?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Tue, 23 Jun 2026 11:02:36 -0700
Subject: [PATCH] =?UTF-8?q?Revert=20"[flang][cuda]=20Do=20not=20emit=20dat?=
=?UTF-8?q?a=20transfer=20for=20constant=20read=20on=20the=20rhs=20?=
=?UTF-8?q?=E2=80=A6"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit 7781a0c196acda252a1db67936b76e96e9971f4c.
---
flang/include/flang/Evaluate/tools.h | 27 --------------------
flang/lib/Evaluate/tools.cpp | 3 ++-
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 14 +---------
3 files changed, 3 insertions(+), 41 deletions(-)
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 08468f304914b..d2d0b69e6337d 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1321,15 +1321,6 @@ inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) {
return false;
}
-inline bool IsCUDAConstantSymbol(const Symbol &sym) {
- if (const auto *details =
- sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- return details->cudaDataAttr() &&
- (*details->cudaDataAttr() == common::CUDADataAttr::Constant);
- }
- return false;
-}
-
// Non-allocatable module-level managed/unified variables use pointer
// indirection through a companion global in __nv_managed_data__.
// Explicit data transfers (cudaMemcpy) must be avoided for these
@@ -1380,16 +1371,6 @@ inline int GetNbOfCUDAManagedOrUnifiedSymbols(const A &expr) {
return symbols.size();
}
-template <typename A> inline int GetNbOfCUDAConstantSymbols(const A &expr) {
- semantics::UnorderedSymbolSet symbols;
- for (const Symbol &sym : CollectCudaSymbols(expr)) {
- if (IsCUDAConstantSymbol(sym)) {
- symbols.insert(sym);
- }
- }
- return symbols.size();
-}
-
// Check if any of the symbols part of the expression has a CUDA device
// attribute.
template <typename A> inline bool HasCUDADeviceAttrs(const A &expr) {
@@ -1402,19 +1383,11 @@ template <typename A, typename B>
inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) {
int lhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(lhs)};
int rhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(rhs)};
- int rhsNbConstantSymbols{GetNbOfCUDAConstantSymbols(rhs)};
int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)};
if (HasNonAllocatableModuleCUDAManagedSymbols(lhs))
return false;
- // If only constant symbols are present on the rhs, and no device symbols on
- // the lhs, then no data transfer is needed because the constant have a host
- // value.
- if (rhsNbConstantSymbols == rhsNbSymbols && !HasCUDADeviceAttrs(lhs)) {
- return false;
- }
-
if (lhsNbManagedSymbols >= 1 && lhs.Rank() > 0 && rhsNbSymbols == 0 &&
rhsNbManagedSymbols == 0 && (IsVariable(rhs) || IsConstantExpr(rhs))) {
return true; // Managed arrays initialization is performed on the device.
diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp
index a60e36654ca34..82dcd1e795f49 100644
--- a/flang/lib/Evaluate/tools.cpp
+++ b/flang/lib/Evaluate/tools.cpp
@@ -1210,7 +1210,8 @@ bool IsCUDADeviceOnlySymbol(const Symbol &sym) {
if (const auto *details =
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
return details->cudaDataAttr() &&
- (*details->cudaDataAttr() == common::CUDADataAttr::Device);
+ (*details->cudaDataAttr() == common::CUDADataAttr::Device ||
+ *details->cudaDataAttr() == common::CUDADataAttr::Constant);
}
return false;
}
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index f236e829072ee..a1006437485ca 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -3,8 +3,6 @@
! Test CUDA Fortran data transfer using assignment statements.
module mod1
- real, constant :: c1 = 1.0
-
type :: t1
integer :: i
end type
@@ -497,7 +495,7 @@ subroutine sub25()
end
! CHECK-LABEL: func.func @_QPsub25()
-! CHECK: fir.allocmem !fir.array<?xf64>, %{{.*}} {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: fir.allocmem !fir.array<?xf64>, %15#1 {bindc_name = ".tmp", uniq_name = ""}
! CHECK: cuf.data_transfer %{{.*}} to %{{.*}} {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.box<!fir.array<?xf64>>
! CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref<f64>
! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xf64>>
@@ -726,13 +724,3 @@ subroutine sub41()
lm(1:5) = a%m(1:5)
end subroutine
-
-subroutine sub42()
- use mod1
- real :: a
- a = c1 * c1
-end subroutine
-
-! CHECK-LABEL: func.func @_QPsub42()
-! CHECK-NOT: cuf.data_transfer
-! CHECK: hlfir.assign
More information about the flang-commits
mailing list