[flang-commits] [flang] [flang] [cuda] Fix CUDA implicit data transfer entity creation (PR #139414)

Zhen Wang via flang-commits flang-commits at lists.llvm.org
Sat May 10 16:53:57 PDT 2025


https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/139414

>From 38d7efcebee251a71c7bbcfb9de3429755c32210 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Sat, 10 May 2025 15:44:35 -0700
Subject: [PATCH 1/2] Fix CUDA implicit data transfer entity creation

---
 flang/lib/Lower/Bridge.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 43375e84f21fa..bfe8898ebff3d 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4778,7 +4778,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
               nbDeviceResidentObject <= 1 &&
               "Only one reference to the device resident object is supported");
           auto addr = getSymbolAddress(sym);
-          hlfir::Entity entity{addr};
+          mlir::Value baseValue;
+          if (auto declareOp = llvm::dyn_cast<hlfir::DeclareOp>(addr.getDefiningOp()))
+            baseValue = declareOp.getBase();
+          else
+            baseValue = addr;
+
+          hlfir::Entity entity{baseValue};
           auto [temp, cleanup] =
               hlfir::createTempFromMold(loc, builder, entity);
           auto needCleanup = fir::getIntIfConstant(cleanup);

>From 3347add1c1b2f56e7adc06d4261dc1f0735eb207 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Sat, 10 May 2025 16:53:44 -0700
Subject: [PATCH 2/2] fix format; add test

---
 flang/lib/Lower/Bridge.cpp             |  3 ++-
 flang/test/Lower/CUDA/cuda-managed.cuf | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/Lower/CUDA/cuda-managed.cuf

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index bfe8898ebff3d..cf9a322680321 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4779,7 +4779,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
               "Only one reference to the device resident object is supported");
           auto addr = getSymbolAddress(sym);
           mlir::Value baseValue;
-          if (auto declareOp = llvm::dyn_cast<hlfir::DeclareOp>(addr.getDefiningOp()))
+          if (auto declareOp =
+                  llvm::dyn_cast<hlfir::DeclareOp>(addr.getDefiningOp()))
             baseValue = declareOp.getBase();
           else
             baseValue = addr;
diff --git a/flang/test/Lower/CUDA/cuda-managed.cuf b/flang/test/Lower/CUDA/cuda-managed.cuf
new file mode 100644
index 0000000000000..618a57da53a25
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-managed.cuf
@@ -0,0 +1,24 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+subroutine testr2(N1,N2)
+  real(4), managed :: ai4(N1,N2)
+  real(4), allocatable :: bRefi4(:)
+
+  integer :: i1, i2
+
+  do i2 = 1, N2
+    do i1 = 1, N1
+      ai4(i1,i2) = i1 + N1*(i2-1)
+    enddo
+  enddo
+
+  allocate(bRefi4 (N1))
+  do i1 = 1, N1
+    bRefi4(i1) = (ai4(i1,1)+ai4(i1,N2))*N2/2
+  enddo
+  deallocate(bRefi4)
+
+end subroutine
+
+!CHECK-LABEL: func.func @_QPtestr2
+!CHECK: %{{.*}} = cuf.alloc !fir.array<?x?xf32>, %{{.*}}, %{{.*}} : index, index {bindc_name = "ai4", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} -> !fir.ref<!fir.array<?x?xf32>>



More information about the flang-commits mailing list