[flang-commits] [flang] [llvm] [flang][cuda] Do not use double descriptor for unified allocatables (PR #190713)
Zhen Wang via flang-commits
flang-commits at lists.llvm.org
Mon Apr 6 17:09:11 PDT 2026
https://github.com/wangzpgi created https://github.com/llvm/llvm-project/pull/190713
Unified memory relies on HMM/ATS and lives in host system memory, so it does not need device-side descriptor synchronization via cudaGetSymbolAddress. Treat it the same as pinned in hasDoubleDescriptor to avoid a cudaErrorInvalidSymbol at runtime.
>From 5cdbab87fbee50739c37f34cf617a4ceefaf3558 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 6 Apr 2026 17:05:46 -0700
Subject: [PATCH 1/2] Do not use double descriptor for unified allocatables
---
flang/lib/Lower/CUDA.cpp | 3 ++-
flang/test/Lower/CUDA/cuda-allocatable.cuf | 10 ++++++++++
globals.mod | 10 ++++++++++
3 files changed, 22 insertions(+), 1 deletion(-)
create mode 100644 globals.mod
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index d8e2d829f9adf..fc7261f9a01e5 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -120,7 +120,8 @@ bool Fortran::lower::hasDoubleDescriptor(mlir::Value addr) {
if (mlir::isa_and_nonnull<fir::AddrOfOp>(
declareOp.getMemref().getDefiningOp())) {
if (declareOp.getDataAttr() &&
- *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+ (*declareOp.getDataAttr() == cuf::DataAttribute::Pinned ||
+ *declareOp.getDataAttr() == cuf::DataAttribute::Unified))
return false;
return true;
}
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 52303d126b8dc..39179c90a2418 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -262,6 +262,16 @@ end subroutine
! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasDoubleDescriptor} -> i32
! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
+subroutine unified_allocate()
+ integer, allocatable, unified, save :: a_unified(:)
+ allocate(a_unified(10))
+ deallocate(a_unified)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPunified_allocate()
+! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<unified>} -> i32
+! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<unified>} -> i32
+
attributes(global) subroutine from_device_source()
real, device, allocatable :: a(:)
real, allocatable :: b(:)
diff --git a/globals.mod b/globals.mod
new file mode 100644
index 0000000000000..baa87c9730a7e
--- /dev/null
+++ b/globals.mod
@@ -0,0 +1,10 @@
+!mod$ v1 sum:dc22d009f1dc3500
+module globals
+real(4),allocatable,device::a_device(:)
+real(4),allocatable,managed::a_managed(:)
+real(4),allocatable,pinned::a_pinned(:)
+type::t1
+integer(4)::a
+real(4),allocatable,device::b(:)
+end type
+end
>From ff579183689850409c415b028cddb09646c0c28b Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 6 Apr 2026 17:08:49 -0700
Subject: [PATCH 2/2] remove .mod file
---
globals.mod | 10 ----------
1 file changed, 10 deletions(-)
delete mode 100644 globals.mod
diff --git a/globals.mod b/globals.mod
deleted file mode 100644
index baa87c9730a7e..0000000000000
--- a/globals.mod
+++ /dev/null
@@ -1,10 +0,0 @@
-!mod$ v1 sum:dc22d009f1dc3500
-module globals
-real(4),allocatable,device::a_device(:)
-real(4),allocatable,managed::a_managed(:)
-real(4),allocatable,pinned::a_pinned(:)
-type::t1
-integer(4)::a
-real(4),allocatable,device::b(:)
-end type
-end
More information about the flang-commits
mailing list