[flang-commits] [flang] [flang][cuda] Extent detection of data transfer with conversion (PR #163852)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu Oct 16 13:03:06 PDT 2025
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/163852 at github.com>
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/163852
>From 383325fc80a41be4c0fec1d4ff9cac7f7e3e0303 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 16 Oct 2025 12:44:21 -0700
Subject: [PATCH 1/2] [flang][cuda] Extent detection of data transfer with
conversion
---
flang/include/flang/Lower/CUDA.h | 8 +++++-
flang/lib/Lower/Bridge.cpp | 5 +---
flang/lib/Lower/CUDA.cpp | 27 +++++++++++++++-----
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 17 ++++++++++++
4 files changed, 46 insertions(+), 11 deletions(-)
diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h
index ab9dde8ad5198..971527cba1de8 100644
--- a/flang/include/flang/Lower/CUDA.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -27,6 +27,10 @@ class Location;
class MLIRContext;
} // namespace mlir
+namespace hlfir {
+class ElementalOp;
+} // namespace hlfir
+
namespace Fortran::lower {
class AbstractConverter;
@@ -58,7 +62,9 @@ cuf::DataAttributeAttr
translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
const Fortran::semantics::Symbol &sym);
-bool isTransferWithConversion(mlir::Value rhs);
+/// Check is the rhs has an implicit conversion. Return the elemental op if
+/// there is a conversion. Return null otherwise.
+hlfir::ElementalOp isTransferWithConversion(mlir::Value rhs);
} // end namespace Fortran::lower
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 68adf346fe8c0..525fb0e9997b7 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4987,11 +4987,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// host = device
if (!lhsIsDevice && rhsIsDevice) {
- if (Fortran::lower::isTransferWithConversion(rhs)) {
+ if (auto elementalOp = Fortran::lower::isTransferWithConversion(rhs)) {
mlir::OpBuilder::InsertionGuard insertionGuard(builder);
- auto elementalOp =
- mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp());
- assert(elementalOp && "expect elemental op");
auto designateOp =
*elementalOp.getBody()->getOps<hlfir::DesignateOp>().begin();
builder.setInsertionPoint(elementalOp);
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index bb4bdee78f97d..9501b0ec60002 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -68,11 +68,26 @@ cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
return cuf::getDataAttribute(mlirContext, cudaAttr);
}
-bool Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
+hlfir::ElementalOp Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
+ auto isConversionElementalOp = [](hlfir::ElementalOp elOp) {
+ return llvm::hasSingleElement(
+ elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
+ llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
+ llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) ==
+ 1;
+ };
+ if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(rhs.getDefiningOp())) {
+ if (!declOp.getMemref().getDefiningOp())
+ return {};
+ if (auto associateOp = mlir::dyn_cast<hlfir::AssociateOp>(
+ declOp.getMemref().getDefiningOp()))
+ if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(
+ associateOp.getSource().getDefiningOp()))
+ if (isConversionElementalOp(elOp))
+ return elOp;
+ }
if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp()))
- if (llvm::hasSingleElement(elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
- llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
- llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) == 1)
- return true;
- return false;
+ if (isConversionElementalOp(elOp))
+ return elOp;
+ return {};
}
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index d1c8ecca3b019..8772497bd5420 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -542,3 +542,20 @@ end subroutine
! CHECK-NOT: cuf.data_transfer
! CHECK: hlfir.assign
! CHECK-NOT: cuf.data_transfer
+
+! Data transfer with conversion with more complex elemental
+! Check that the data transfer is palce
+subroutine sub29()
+ real(2), device, allocatable :: a(:)
+ real(4), allocatable :: ha(:)
+ allocate(a(10))
+ allocate(ha(10))
+ ha = a
+ deallocate(a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub29()
+! CHECK: %[[TMP:.*]] = fir.allocmem !fir.array<?xf16>, %24#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[TMP_BUFFER:.*]]:2 = hlfir.declare %[[TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf16>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf16>>, !fir.heap<!fir.array<?xf16>>)
+! CHECK: cuf.data_transfer %{{.*}} to %[[TMP_BUFFER]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.heap<!fir.array<?xf16>>>, !fir.box<!fir.array<?xf16>>
+! CHECK: hlfir.elemental
>From 67d130bb4dc2b4e39efcb1c83970e531f78dac91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
=?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
=?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Thu, 16 Oct 2025 13:02:58 -0700
Subject: [PATCH 2/2] Update flang/test/Lower/CUDA/cuda-data-transfer.cuf
---
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 8772497bd5420..b0b8d09c0c55b 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -544,7 +544,7 @@ end subroutine
! CHECK-NOT: cuf.data_transfer
! Data transfer with conversion with more complex elemental
-! Check that the data transfer is palce
+! Check that the data transfer is placed before the elemental op.
subroutine sub29()
real(2), device, allocatable :: a(:)
real(4), allocatable :: ha(:)
More information about the flang-commits
mailing list