[flang-commits] [flang] [llvm] [flang][cuda] Do not use double descriptor for unified allocatables (PR #190713)

Mon Apr 6 17:09:11 PDT 2026

https://github.com/wangzpgi created https://github.com/llvm/llvm-project/pull/190713

Unified memory relies on HMM/ATS and lives in host system memory, so it does not need device-side descriptor synchronization via cudaGetSymbolAddress. Treat it the same as pinned in hasDoubleDescriptor to avoid a cudaErrorInvalidSymbol at runtime.

>From 5cdbab87fbee50739c37f34cf617a4ceefaf3558 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 6 Apr 2026 17:05:46 -0700
Subject: [PATCH 1/2] Do not use double descriptor for unified allocatables

---
 flang/lib/Lower/CUDA.cpp                   |  3 ++-
 flang/test/Lower/CUDA/cuda-allocatable.cuf | 10 ++++++++++
 globals.mod                                | 10 ++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 globals.mod

diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index d8e2d829f9adf..fc7261f9a01e5 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -120,7 +120,8 @@ bool Fortran::lower::hasDoubleDescriptor(mlir::Value addr) {
     if (mlir::isa_and_nonnull<fir::AddrOfOp>(
             declareOp.getMemref().getDefiningOp())) {
       if (declareOp.getDataAttr() &&
-          *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+          (*declareOp.getDataAttr() == cuf::DataAttribute::Pinned ||
+           *declareOp.getDataAttr() == cuf::DataAttribute::Unified))
         return false;
       return true;
     }
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 52303d126b8dc..39179c90a2418 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -262,6 +262,16 @@ end subroutine
 ! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasDoubleDescriptor} -> i32
 ! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
 
+subroutine unified_allocate()
+  integer, allocatable, unified, save :: a_unified(:)
+  allocate(a_unified(10))
+  deallocate(a_unified)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPunified_allocate()
+! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<unified>} -> i32
+! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<unified>} -> i32
+
 attributes(global) subroutine from_device_source()
   real, device, allocatable :: a(:)
   real, allocatable :: b(:)
diff --git a/globals.mod b/globals.mod
new file mode 100644
index 0000000000000..baa87c9730a7e
--- /dev/null
+++ b/globals.mod
@@ -0,0 +1,10 @@
+!mod$ v1 sum:dc22d009f1dc3500
+module globals
+real(4),allocatable,device::a_device(:)
+real(4),allocatable,managed::a_managed(:)
+real(4),allocatable,pinned::a_pinned(:)
+type::t1
+integer(4)::a
+real(4),allocatable,device::b(:)
+end type
+end

>From ff579183689850409c415b028cddb09646c0c28b Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 6 Apr 2026 17:08:49 -0700
Subject: [PATCH 2/2] remove .mod file

---
 globals.mod | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 globals.mod

diff --git a/globals.mod b/globals.mod
deleted file mode 100644
index baa87c9730a7e..0000000000000
--- a/globals.mod
+++ /dev/null
@@ -1,10 +0,0 @@
-!mod$ v1 sum:dc22d009f1dc3500
-module globals
-real(4),allocatable,device::a_device(:)
-real(4),allocatable,managed::a_managed(:)
-real(4),allocatable,pinned::a_pinned(:)
-type::t1
-integer(4)::a
-real(4),allocatable,device::b(:)
-end type
-end