[flang-commits] [flang] 8b06ef3 - [flang][cuda] Move function result assignment to managed variable on host (#163705)

via flang-commits flang-commits at lists.llvm.org
Thu Oct 16 10:01:15 PDT 2025


Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-10-16T10:01:11-07:00
New Revision: 8b06ef3191093d60317fec430847cf92dfbce0f9

URL: https://github.com/llvm/llvm-project/commit/8b06ef3191093d60317fec430847cf92dfbce0f9
DIFF: https://github.com/llvm/llvm-project/commit/8b06ef3191093d60317fec430847cf92dfbce0f9.diff

LOG: [flang][cuda] Move function result assignment to managed variable on host (#163705)

- Update data transfer detection to let an assignment with a host rhs
and managed lhs to be performed on the host. This helps if the rhs is a
function result.
- Fix test `cuda-maanaged-cuf` introduces on eef4b5a. The test was not
checking for the implicit transfer but the explicit transfer that was
part of the first loop.

Added: 
    

Modified: 
    flang/include/flang/Evaluate/tools.h
    flang/test/Lower/CUDA/cuda-data-transfer.cuf
    flang/test/Lower/CUDA/cuda-managed.cuf

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 20a091918dc56..7f64d230f7348 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1342,10 +1342,12 @@ inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) {
   int rhsNbManagedSymbols = {GetNbOfCUDAManagedOrUnifiedSymbols(rhs)};
   int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)};
 
-  // Special case where only managed or unifed symbols are involved. This is
-  // performed on the host.
-  if (lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 &&
-      rhsNbSymbols == 1) {
+  // Special cases perforemd on the host:
+  // - Only managed or unifed symbols are involved on RHS and LHS.
+  // - LHS is managed or unified and the RHS is host only.
+  if ((lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 &&
+          rhsNbSymbols == 1) ||
+      (lhsNbManagedSymbols == 1 && rhsNbSymbols == 0)) {
     return false;
   }
   return HasCUDADeviceAttrs(lhs) || rhsNbSymbols > 0;

diff  --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index aef926b09a1ed..d1c8ecca3b019 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -15,6 +15,13 @@ module mod1
 
   real(kind=8), device, allocatable, dimension(:) :: p
 
+  interface
+    function __sum(a_d) result(res_h)
+      integer(4), managed, intent(in) :: a_d(:,:,:,:)
+      integer(4), allocatable, managed :: res_h(:,:,:)
+    end function
+  end interface
+
 contains
   function dev1(a)
     integer, device :: a(:)
@@ -522,3 +529,16 @@ end subroutine
 ! CHECK: hlfir.yield_element %[[CONV]] : f32
 ! CHECK: }
 ! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref<!fir.array<10x20x30xf32>>
+
+subroutine sub28(N1,N2,N3,N4)
+  use mod1
+  integer(4), managed :: a(N1,N2,N3,N4) 
+  integer(4), managed :: bres(N1,N2,N3)
+  bres = __sum(a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub28
+! CHECK: fir.call @_QP__sum
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
+! CHECK-NOT: cuf.data_transfer

diff  --git a/flang/test/Lower/CUDA/cuda-managed.cuf b/flang/test/Lower/CUDA/cuda-managed.cuf
index e14bd849670b1..69c9ecfd355f7 100644
--- a/flang/test/Lower/CUDA/cuda-managed.cuf
+++ b/flang/test/Lower/CUDA/cuda-managed.cuf
@@ -1,18 +1,14 @@
 ! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
 
+! Check for implicit data transfer of managed variable
+
 subroutine testr2(N1,N2)
   real(4), managed :: ai4(N1,N2)
   real(4), allocatable :: bRefi4(:)
 
   integer :: i1, i2
 
-  do i2 = 1, N2
-    do i1 = 1, N1
-      ai4(i1,i2) = i1 + N1*(i2-1)
-    enddo
-  enddo
-
-  allocate(bRefi4 (N1))
+  allocate(bRefi4(N1))
   do i1 = 1, N1
     bRefi4(i1) = (ai4(i1,1)+ai4(i1,N2))*N2/2
   enddo
@@ -20,8 +16,8 @@ subroutine testr2(N1,N2)
 
 end subroutine
 
-!CHECK-LABEL: func.func @_QPtestr2
-!CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.array<?x?xf32>, %{{.*}}, %{{.*}} : index, index {bindc_name = "ai4", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} -> !fir.ref<!fir.array<?x?xf32>>
-!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOC]](%{{.*}}) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
-!CHECK: %[[DEST:.*]] = hlfir.designate %[[DECLARE]]#0 (%{{.*}}, %{{.*}}) : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
-!CHECK: cuf.data_transfer %{{.*}}#0 to %[[DEST]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<f32>, !fir.ref<f32>
+! CHECK-LABEL: func.func @_QPtestr2
+! CHECK:  %[[MANAGED:.*]]:2 = hlfir.declare %22(%23) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: %[[TMP:.*]] = fir.allocmem !fir.array<?x?xf32>, %16, %21 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %[[TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.heap<!fir.array<?x?xf32>>)
+! CHECK: cuf.data_transfer %[[MANAGED]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>


        


More information about the flang-commits mailing list