[flang-commits] [flang] d1daf3e - [flang][cuda] Do not repack CUDA device dummy arguments (#195950)

via flang-commits flang-commits at lists.llvm.org
Thu May 7 11:12:42 PDT 2026


Author: khaki3
Date: 2026-05-07T11:12:37-07:00
New Revision: d1daf3eaf6ca1ec308b64381d10447c1ba333b8e

URL: https://github.com/llvm/llvm-project/commit/d1daf3eaf6ca1ec308b64381d10447c1ba333b8e
DIFF: https://github.com/llvm/llvm-project/commit/d1daf3eaf6ca1ec308b64381d10447c1ba333b8e.diff

LOG: [flang][cuda] Do not repack CUDA device dummy arguments (#195950)

-frepack-arrays (implied by -Ofast) was inserting fir.pack_array /
fir.unpack_array for assumed-shape dummy arguments with CUDA data
attributes (device, managed, etc.). The repacking allocates a host-side
temporary and copies the descriptor, but the data lives in device
memory. When the CUF kernel subsequently receives the host descriptor
pointer, accessing it from the GPU triggers cudaErrorIllegalAddress.

Skip repacking in needsRepack() for any symbol that carries a CUDA data
attribute.

Added: 
    flang/test/Lower/CUDA/cuda-repack-arrays.cuf

Modified: 
    flang/lib/Lower/ConvertVariable.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 59abdb92e33ba..05b3fb4a5d370 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -1113,6 +1113,13 @@ static bool needsRepack(Fortran::lower::AbstractConverter &converter,
                     Fortran::semantics::Attr::VOLATILE}))
     return false;
 
+  // CUDA device/managed/unified/shared/pinned arrays must not be repacked
+  // on the host. The repacking would allocate a host-side temporary and
+  // copy the descriptor, but the data lives in device memory, causing
+  // illegal address errors when the kernel tries to access it.
+  if (auto cudaAttr = Fortran::semantics::GetCUDADataAttr(&sym))
+    return false;
+
   return true;
 }
 

diff  --git a/flang/test/Lower/CUDA/cuda-repack-arrays.cuf b/flang/test/Lower/CUDA/cuda-repack-arrays.cuf
new file mode 100644
index 0000000000000..db14b7543762f
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-repack-arrays.cuf
@@ -0,0 +1,39 @@
+! RUN: bbc -emit-hlfir -fcuda -frepack-arrays %s -o - | FileCheck %s
+
+! Verify that -frepack-arrays does not generate fir.pack_array for
+! CUDA device dummy arguments.  Repacking a device array on the host
+! would create a host-side temporary whose address is invalid on the
+! GPU, leading to cudaErrorIllegalAddress at kernel launch.
+
+subroutine sub1(db, n1, n2)
+  real, device :: db(:,:)
+  integer :: n1, n2
+  db(1,1) = 1.0
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub1(
+! CHECK-SAME:    %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {cuf.data_attr = #cuf.cuda<device>
+! CHECK-NOT:     fir.pack_array
+! CHECK:         return
+
+subroutine sub2(x, n)
+  real :: x(:)
+  integer :: n
+  x(1) = 1.0
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub2(
+! CHECK:         fir.pack_array
+! CHECK:         fir.unpack_array
+! CHECK:         return
+
+subroutine sub3(db, n)
+  real, managed :: db(:)
+  integer :: n
+  db(1) = 1.0
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub3(
+! CHECK-SAME:    %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda<managed>
+! CHECK-NOT:     fir.pack_array
+! CHECK:         return


        


More information about the flang-commits mailing list