[flang-commits] [flang] [flang][CUDA] Apply implicit managed attribute when `-gpu=mem:managed` is used. (PR #175648)
Zhen Wang via flang-commits
flang-commits at lists.llvm.org
Mon Jan 12 17:49:10 PST 2026
https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/175648
>From e070ba023bea4c1160e3b583b3ff7d581b86900f Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 6 Jan 2026 13:38:51 -0800
Subject: [PATCH 1/5] add support for -gpu=managed
---
flang/lib/Semantics/resolve-names.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 527be8645ff81..51389501a87a2 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3517,6 +3517,14 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol,
"'%s' is not an object and may not have a CUDA data attribute"_err_en_US,
symbol.name());
}
+ } else if (context().languageFeatures().IsEnabled(
+ common::LanguageFeature::CudaManaged)) {
+ // -gpu=managed: implicitly treat allocatable arrays as managed
+ if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
+ if (IsAllocatable(symbol) && !object->cudaDataAttr()) {
+ object->set_cudaDataAttr(common::CUDADataAttr::Managed);
+ }
+ }
}
}
>From cf184d3b0f843f10f0659d9fdf92d3aaeaa5d07d Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 09:15:12 -0800
Subject: [PATCH 2/5] add test
---
flang/test/Lower/CUDA/cuda-gpu-managed.cuf | 186 +++++++++++++++++++++
1 file changed, 186 insertions(+)
create mode 100644 flang/test/Lower/CUDA/cuda-gpu-managed.cuf
diff --git a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
new file mode 100644
index 0000000000000..ca974c25f362c
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
@@ -0,0 +1,186 @@
+! RUN: bbc -emit-hlfir -fcuda -gpu=managed %s -o - | FileCheck %s
+
+! Test -gpu=managed flag: allocatable arrays without explicit CUDA attributes
+! should be implicitly treated as managed.
+
+! -----------------------------------------------------------------------------
+! Test 1: Basic allocatable without explicit attribute becomes managed
+! -----------------------------------------------------------------------------
+subroutine test_implicit_managed()
+ real, allocatable :: a(:)
+ allocate(a(100))
+ a = 1.0
+ deallocate(a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_implicit_managed()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_implicit_managedEa"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_implicit_managedEa"}
+! CHECK: cuf.allocate %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<managed>}
+! CHECK: cuf.deallocate %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<managed>}
+! CHECK: cuf.free %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<managed>}
+
+! -----------------------------------------------------------------------------
+! Test 2: Explicit device attribute is preserved (not overridden to managed)
+! -----------------------------------------------------------------------------
+subroutine test_explicit_device()
+ real, allocatable, device :: d(:)
+ allocate(d(100))
+ deallocate(d)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_device()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_explicit_deviceEd"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_explicit_deviceEd"}
+! CHECK: cuf.allocate %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<device>}
+
+! -----------------------------------------------------------------------------
+! Test 3: Explicit pinned attribute is preserved
+! -----------------------------------------------------------------------------
+subroutine test_explicit_pinned()
+ real, allocatable, pinned :: p(:)
+ allocate(p(100))
+ deallocate(p)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_pinned()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "p", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFtest_explicit_pinnedEp"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_explicit_pinnedEp"}
+
+! -----------------------------------------------------------------------------
+! Test 4: Explicit managed attribute is preserved (redundant but valid)
+! -----------------------------------------------------------------------------
+subroutine test_explicit_managed()
+ real, allocatable, managed :: m(:)
+ allocate(m(100))
+ deallocate(m)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_managed()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "m", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_explicit_managedEm"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_explicit_managedEm"}
+
+! -----------------------------------------------------------------------------
+! Test 5: Pointer variables are NOT affected by -gpu=managed
+! -----------------------------------------------------------------------------
+subroutine test_pointer_not_managed()
+ real, pointer :: ptr(:)
+ allocate(ptr(100))
+ ptr = 1.0
+ deallocate(ptr)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_pointer_not_managed()
+! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?xf32>>> {bindc_name = "ptr", uniq_name = "_QFtest_pointer_not_managedEptr"}
+! CHECK-NOT: data_attr = #cuf.cuda<managed>
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_pointer_not_managedEptr"}
+! CHECK: fir.call @_FortranAPointerAllocate
+
+! -----------------------------------------------------------------------------
+! Test 6: Multiple allocatables - mix of implicit and explicit
+! -----------------------------------------------------------------------------
+subroutine test_mixed_allocatables()
+ real, allocatable :: a(:) ! Should become managed
+ real, allocatable, device :: d(:) ! Should stay device
+ real, allocatable, pinned :: p(:) ! Should stay pinned
+ real, allocatable, managed :: m(:) ! Should stay managed (explicit)
+
+ allocate(a(10), d(10), p(10), m(10))
+ deallocate(a, d, p, m)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_mixed_allocatables()
+! CHECK: cuf.alloc {{.*}} {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_mixed_allocatablesEa"}
+! CHECK: cuf.alloc {{.*}} {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_mixed_allocatablesEd"}
+! CHECK: cuf.alloc {{.*}} {bindc_name = "m", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_mixed_allocatablesEm"}
+! CHECK: cuf.alloc {{.*}} {bindc_name = "p", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFtest_mixed_allocatablesEp"}
+
+! -----------------------------------------------------------------------------
+! Test 7: Allocatable in derived type - component allocation uses managed
+! -----------------------------------------------------------------------------
+module mod_derived
+ type :: container
+ real, allocatable :: data(:)
+ end type
+end module
+
+subroutine test_derived_type()
+ use mod_derived
+ type(container) :: c
+ allocate(c%data(100))
+ deallocate(c%data)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_derived_type()
+! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
+! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
+
+! -----------------------------------------------------------------------------
+! Test 8: Multi-dimensional allocatable array
+! -----------------------------------------------------------------------------
+subroutine test_multidim()
+ real, allocatable :: arr(:,:,:)
+ allocate(arr(10,20,30))
+ arr = 0.0
+ deallocate(arr)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_multidim()
+! CHECK: cuf.alloc {{.*}} {bindc_name = "arr", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_multidimEarr"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+
+! -----------------------------------------------------------------------------
+! Test 9: Explicit unified attribute is preserved (not overridden to managed)
+! -----------------------------------------------------------------------------
+subroutine test_explicit_unified()
+ real, allocatable, unified :: u(:)
+ allocate(u(100))
+ deallocate(u)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_unified()
+! CHECK: cuf.alloc {{.*}} {bindc_name = "u", data_attr = #cuf.cuda<unified>, uniq_name = "_QFtest_explicit_unifiedEu"}
+! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<unified>, fortran_attrs = #fir.var_attrs<allocatable>
+
+! -----------------------------------------------------------------------------
+! Test 10: Dummy arguments - allocatable dummy without explicit attribute
+! -----------------------------------------------------------------------------
+subroutine test_dummy_allocatable(arr)
+ real, allocatable, intent(inout) :: arr(:)
+ if (.not. allocated(arr)) allocate(arr(100))
+ arr = 1.0
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_dummy_allocatable(
+! CHECK-SAME: %{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuf.data_attr = #cuf.cuda<managed>, fir.bindc_name = "arr"})
+! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable, intent_inout>
+
+! -----------------------------------------------------------------------------
+! Test 11: Module variables - allocatable module variable becomes managed
+! -----------------------------------------------------------------------------
+module mod_globals
+ real, allocatable :: global_arr(:)
+ real, allocatable, device :: global_device(:)
+end module
+
+! CHECK: fir.global @_QMmod_globalsEglobal_arr {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+
+! CHECK: fir.global @_QMmod_globalsEglobal_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
+
+subroutine test_module_var()
+ use mod_globals
+ allocate(global_arr(50))
+ allocate(global_device(50))
+ deallocate(global_arr)
+ deallocate(global_device)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_module_var()
+! CHECK: cuf.allocate {{.*}} {data_attr = #cuf.cuda<managed>, hasDoubleDescriptor}
+! CHECK: cuf.allocate {{.*}} {data_attr = #cuf.cuda<device>, hasDoubleDescriptor}
>From 7f446facd4f43405c55a112c7d58d577fb8e1f22 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 11:07:33 -0800
Subject: [PATCH 3/5] move the implicit managed logic to run AFTER all explicit
CUDA attributes have been processed
---
flang/lib/Semantics/resolve-names.cpp | 20 ++++++++------
flang/test/Lower/CUDA/cuda-gpu-managed.cuf | 32 ++++------------------
flang/tools/bbc/bbc.cpp | 4 +--
3 files changed, 20 insertions(+), 36 deletions(-)
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 51389501a87a2..bf0dbcd619841 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3517,15 +3517,8 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol,
"'%s' is not an object and may not have a CUDA data attribute"_err_en_US,
symbol.name());
}
- } else if (context().languageFeatures().IsEnabled(
- common::LanguageFeature::CudaManaged)) {
- // -gpu=managed: implicitly treat allocatable arrays as managed
- if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
- if (IsAllocatable(symbol) && !object->cudaDataAttr()) {
- object->set_cudaDataAttr(common::CUDADataAttr::Managed);
- }
- }
}
+
}
// ModuleVisitor implementation
@@ -9918,6 +9911,17 @@ void ResolveNamesVisitor::FinishSpecificationPart(
SetBindNameOn(symbol);
}
}
+ // -gpu=managed: implicitly treat allocatable arrays as managed
+ // This is done here after all explicit CUDA attributes have been processed.
+ if (context().languageFeatures().IsEnabled(
+ common::LanguageFeature::CudaManaged)) {
+ if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
+ if (IsAllocatable(symbol) && !IsPointer(symbol) &&
+ !object->cudaDataAttr()) {
+ object->set_cudaDataAttr(common::CUDADataAttr::Managed);
+ }
+ }
+ }
}
currScope().InstantiateDerivedTypes();
for (const auto &decl : decls) {
diff --git a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
index ca974c25f362c..b015b470b921b 100644
--- a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
+++ b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
@@ -1,4 +1,4 @@
-! RUN: bbc -emit-hlfir -fcuda -gpu=managed %s -o - | FileCheck %s
+! RUN: bbc -emit-hlfir -fcuda -gpu=mem:managed %s -o - | FileCheck %s
! Test -gpu=managed flag: allocatable arrays without explicit CUDA attributes
! should be implicitly treated as managed.
@@ -88,7 +88,7 @@ subroutine test_mixed_allocatables()
real, allocatable, device :: d(:) ! Should stay device
real, allocatable, pinned :: p(:) ! Should stay pinned
real, allocatable, managed :: m(:) ! Should stay managed (explicit)
-
+
allocate(a(10), d(10), p(10), m(10))
deallocate(a, d, p, m)
end subroutine
@@ -100,27 +100,7 @@ end subroutine
! CHECK: cuf.alloc {{.*}} {bindc_name = "p", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFtest_mixed_allocatablesEp"}
! -----------------------------------------------------------------------------
-! Test 7: Allocatable in derived type - component allocation uses managed
-! -----------------------------------------------------------------------------
-module mod_derived
- type :: container
- real, allocatable :: data(:)
- end type
-end module
-
-subroutine test_derived_type()
- use mod_derived
- type(container) :: c
- allocate(c%data(100))
- deallocate(c%data)
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_derived_type()
-! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
-! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
-
-! -----------------------------------------------------------------------------
-! Test 8: Multi-dimensional allocatable array
+! Test 7: Multi-dimensional allocatable array
! -----------------------------------------------------------------------------
subroutine test_multidim()
real, allocatable :: arr(:,:,:)
@@ -134,7 +114,7 @@ end subroutine
! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
! -----------------------------------------------------------------------------
-! Test 9: Explicit unified attribute is preserved (not overridden to managed)
+! Test 8: Explicit unified attribute is preserved (not overridden to managed)
! -----------------------------------------------------------------------------
subroutine test_explicit_unified()
real, allocatable, unified :: u(:)
@@ -147,7 +127,7 @@ end subroutine
! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<unified>, fortran_attrs = #fir.var_attrs<allocatable>
! -----------------------------------------------------------------------------
-! Test 10: Dummy arguments - allocatable dummy without explicit attribute
+! Test 9: Dummy arguments - allocatable dummy without explicit attribute
! -----------------------------------------------------------------------------
subroutine test_dummy_allocatable(arr)
real, allocatable, intent(inout) :: arr(:)
@@ -160,7 +140,7 @@ end subroutine
! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable, intent_inout>
! -----------------------------------------------------------------------------
-! Test 11: Module variables - allocatable module variable becomes managed
+! Test 10: Module variables - allocatable module variable becomes managed
! -----------------------------------------------------------------------------
module mod_globals
real, allocatable :: global_arr(:)
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 607ac5c82cec4..aefb4369fc761 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -638,9 +638,9 @@ int main(int argc, char **argv) {
Fortran::common::LanguageFeature::CudaWarpMatchFunction, false);
}
- if (enableGPUMode == "managed") {
+ if (enableGPUMode == "managed" || enableGPUMode == "mem:managed") {
options.features.Enable(Fortran::common::LanguageFeature::CudaManaged);
- } else if (enableGPUMode == "unified") {
+ } else if (enableGPUMode == "unified" || enableGPUMode == "mem:unified") {
options.features.Enable(Fortran::common::LanguageFeature::CudaUnified);
}
>From 1d3074a6d2a14f367bd518efb55bd2d72d9f16c8 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 13:28:54 -0800
Subject: [PATCH 4/5] remove space
---
flang/lib/Semantics/resolve-names.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index bf0dbcd619841..d50287ffcae31 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3518,7 +3518,6 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol,
symbol.name());
}
}
-
}
// ModuleVisitor implementation
>From 0a29846bc0479dca68b12fdd7052efb21c16b86b Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 14:03:13 -0800
Subject: [PATCH 5/5] remove check for IsPointer
---
flang/lib/Semantics/resolve-names.cpp | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index d50287ffcae31..1256718fc76df 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -9910,17 +9910,13 @@ void ResolveNamesVisitor::FinishSpecificationPart(
SetBindNameOn(symbol);
}
}
- // -gpu=managed: implicitly treat allocatable arrays as managed
- // This is done here after all explicit CUDA attributes have been processed.
+ // -gpu=mem:managed: implicitly treat allocatable arrays as managed.
+ // This is done after all explicit CUDA attributes have been processed.
if (context().languageFeatures().IsEnabled(
- common::LanguageFeature::CudaManaged)) {
- if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
- if (IsAllocatable(symbol) && !IsPointer(symbol) &&
- !object->cudaDataAttr()) {
+ common::LanguageFeature::CudaManaged))
+ if (auto *object{symbol.detailsIf<ObjectEntityDetails>()})
+ if (IsAllocatable(symbol) && !object->cudaDataAttr())
object->set_cudaDataAttr(common::CUDADataAttr::Managed);
- }
- }
- }
}
currScope().InstantiateDerivedTypes();
for (const auto &decl : decls) {
More information about the flang-commits
mailing list