[flang-commits] [flang] [flang][CUDA] Apply implicit managed attribute when `-gpu=mem:managed` is used. (PR #175648)

Zhen Wang via flang-commits flang-commits at lists.llvm.org
Mon Jan 12 17:49:10 PST 2026


https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/175648

>From e070ba023bea4c1160e3b583b3ff7d581b86900f Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 6 Jan 2026 13:38:51 -0800
Subject: [PATCH 1/5] add support for -gpu=managed

---
 flang/lib/Semantics/resolve-names.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 527be8645ff81..51389501a87a2 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3517,6 +3517,14 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol,
           "'%s' is not an object and may not have a CUDA data attribute"_err_en_US,
           symbol.name());
     }
+  } else if (context().languageFeatures().IsEnabled(
+                 common::LanguageFeature::CudaManaged)) {
+    // -gpu=managed: implicitly treat allocatable arrays as managed
+    if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
+      if (IsAllocatable(symbol) && !object->cudaDataAttr()) {
+        object->set_cudaDataAttr(common::CUDADataAttr::Managed);
+      }
+    }
   }
 }
 

>From cf184d3b0f843f10f0659d9fdf92d3aaeaa5d07d Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 09:15:12 -0800
Subject: [PATCH 2/5] add test

---
 flang/test/Lower/CUDA/cuda-gpu-managed.cuf | 186 +++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 flang/test/Lower/CUDA/cuda-gpu-managed.cuf

diff --git a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
new file mode 100644
index 0000000000000..ca974c25f362c
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
@@ -0,0 +1,186 @@
+! RUN: bbc -emit-hlfir -fcuda -gpu=managed %s -o - | FileCheck %s
+
+! Test -gpu=managed flag: allocatable arrays without explicit CUDA attributes
+! should be implicitly treated as managed.
+
+! -----------------------------------------------------------------------------
+! Test 1: Basic allocatable without explicit attribute becomes managed
+! -----------------------------------------------------------------------------
+subroutine test_implicit_managed()
+  real, allocatable :: a(:)
+  allocate(a(100))
+  a = 1.0
+  deallocate(a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_implicit_managed()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_implicit_managedEa"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_implicit_managedEa"}
+! CHECK: cuf.allocate %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<managed>}
+! CHECK: cuf.deallocate %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<managed>}
+! CHECK: cuf.free %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<managed>}
+
+! -----------------------------------------------------------------------------
+! Test 2: Explicit device attribute is preserved (not overridden to managed)
+! -----------------------------------------------------------------------------
+subroutine test_explicit_device()
+  real, allocatable, device :: d(:)
+  allocate(d(100))
+  deallocate(d)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_device()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_explicit_deviceEd"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_explicit_deviceEd"}
+! CHECK: cuf.allocate %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda<device>}
+
+! -----------------------------------------------------------------------------
+! Test 3: Explicit pinned attribute is preserved
+! -----------------------------------------------------------------------------
+subroutine test_explicit_pinned()
+  real, allocatable, pinned :: p(:)
+  allocate(p(100))
+  deallocate(p)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_pinned()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "p", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFtest_explicit_pinnedEp"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_explicit_pinnedEp"}
+
+! -----------------------------------------------------------------------------
+! Test 4: Explicit managed attribute is preserved (redundant but valid)
+! -----------------------------------------------------------------------------
+subroutine test_explicit_managed()
+  real, allocatable, managed :: m(:)
+  allocate(m(100))
+  deallocate(m)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_managed()
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "m", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_explicit_managedEm"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_explicit_managedEm"}
+
+! -----------------------------------------------------------------------------
+! Test 5: Pointer variables are NOT affected by -gpu=managed
+! -----------------------------------------------------------------------------
+subroutine test_pointer_not_managed()
+  real, pointer :: ptr(:)
+  allocate(ptr(100))
+  ptr = 1.0
+  deallocate(ptr)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_pointer_not_managed()
+! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?xf32>>> {bindc_name = "ptr", uniq_name = "_QFtest_pointer_not_managedEptr"}
+! CHECK-NOT: data_attr = #cuf.cuda<managed>
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_pointer_not_managedEptr"}
+! CHECK: fir.call @_FortranAPointerAllocate
+
+! -----------------------------------------------------------------------------
+! Test 6: Multiple allocatables - mix of implicit and explicit
+! -----------------------------------------------------------------------------
+subroutine test_mixed_allocatables()
+  real, allocatable :: a(:)           ! Should become managed
+  real, allocatable, device :: d(:)   ! Should stay device
+  real, allocatable, pinned :: p(:)   ! Should stay pinned
+  real, allocatable, managed :: m(:)  ! Should stay managed (explicit)
+  
+  allocate(a(10), d(10), p(10), m(10))
+  deallocate(a, d, p, m)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_mixed_allocatables()
+! CHECK: cuf.alloc {{.*}} {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_mixed_allocatablesEa"}
+! CHECK: cuf.alloc {{.*}} {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_mixed_allocatablesEd"}
+! CHECK: cuf.alloc {{.*}} {bindc_name = "m", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_mixed_allocatablesEm"}
+! CHECK: cuf.alloc {{.*}} {bindc_name = "p", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFtest_mixed_allocatablesEp"}
+
+! -----------------------------------------------------------------------------
+! Test 7: Allocatable in derived type - component allocation uses managed
+! -----------------------------------------------------------------------------
+module mod_derived
+  type :: container
+    real, allocatable :: data(:)
+  end type
+end module
+
+subroutine test_derived_type()
+  use mod_derived
+  type(container) :: c
+  allocate(c%data(100))
+  deallocate(c%data)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_derived_type()
+! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
+! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
+
+! -----------------------------------------------------------------------------
+! Test 8: Multi-dimensional allocatable array
+! -----------------------------------------------------------------------------
+subroutine test_multidim()
+  real, allocatable :: arr(:,:,:)
+  allocate(arr(10,20,30))
+  arr = 0.0
+  deallocate(arr)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_multidim()
+! CHECK: cuf.alloc {{.*}} {bindc_name = "arr", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtest_multidimEarr"}
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+
+! -----------------------------------------------------------------------------
+! Test 9: Explicit unified attribute is preserved (not overridden to managed)
+! -----------------------------------------------------------------------------
+subroutine test_explicit_unified()
+  real, allocatable, unified :: u(:)
+  allocate(u(100))
+  deallocate(u)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_explicit_unified()
+! CHECK: cuf.alloc {{.*}} {bindc_name = "u", data_attr = #cuf.cuda<unified>, uniq_name = "_QFtest_explicit_unifiedEu"}
+! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<unified>, fortran_attrs = #fir.var_attrs<allocatable>
+
+! -----------------------------------------------------------------------------
+! Test 10: Dummy arguments - allocatable dummy without explicit attribute
+! -----------------------------------------------------------------------------
+subroutine test_dummy_allocatable(arr)
+  real, allocatable, intent(inout) :: arr(:)
+  if (.not. allocated(arr)) allocate(arr(100))
+  arr = 1.0
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_dummy_allocatable(
+! CHECK-SAME: %{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuf.data_attr = #cuf.cuda<managed>, fir.bindc_name = "arr"})
+! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable, intent_inout>
+
+! -----------------------------------------------------------------------------
+! Test 11: Module variables - allocatable module variable becomes managed
+! -----------------------------------------------------------------------------
+module mod_globals
+  real, allocatable :: global_arr(:)
+  real, allocatable, device :: global_device(:)
+end module
+
+! CHECK: fir.global @_QMmod_globalsEglobal_arr {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
+
+! CHECK: fir.global @_QMmod_globalsEglobal_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
+
+subroutine test_module_var()
+  use mod_globals
+  allocate(global_arr(50))
+  allocate(global_device(50))
+  deallocate(global_arr)
+  deallocate(global_device)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_module_var()
+! CHECK: cuf.allocate {{.*}} {data_attr = #cuf.cuda<managed>, hasDoubleDescriptor}
+! CHECK: cuf.allocate {{.*}} {data_attr = #cuf.cuda<device>, hasDoubleDescriptor}

>From 7f446facd4f43405c55a112c7d58d577fb8e1f22 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 11:07:33 -0800
Subject: [PATCH 3/5] move the implicit managed logic to run AFTER all explicit
 CUDA attributes have been processed

---
 flang/lib/Semantics/resolve-names.cpp      | 20 ++++++++------
 flang/test/Lower/CUDA/cuda-gpu-managed.cuf | 32 ++++------------------
 flang/tools/bbc/bbc.cpp                    |  4 +--
 3 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 51389501a87a2..bf0dbcd619841 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3517,15 +3517,8 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol,
           "'%s' is not an object and may not have a CUDA data attribute"_err_en_US,
           symbol.name());
     }
-  } else if (context().languageFeatures().IsEnabled(
-                 common::LanguageFeature::CudaManaged)) {
-    // -gpu=managed: implicitly treat allocatable arrays as managed
-    if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
-      if (IsAllocatable(symbol) && !object->cudaDataAttr()) {
-        object->set_cudaDataAttr(common::CUDADataAttr::Managed);
-      }
-    }
   }
+
 }
 
 // ModuleVisitor implementation
@@ -9918,6 +9911,17 @@ void ResolveNamesVisitor::FinishSpecificationPart(
         SetBindNameOn(symbol);
       }
     }
+    // -gpu=managed: implicitly treat allocatable arrays as managed
+    // This is done here after all explicit CUDA attributes have been processed.
+    if (context().languageFeatures().IsEnabled(
+            common::LanguageFeature::CudaManaged)) {
+      if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
+        if (IsAllocatable(symbol) && !IsPointer(symbol) &&
+            !object->cudaDataAttr()) {
+          object->set_cudaDataAttr(common::CUDADataAttr::Managed);
+        }
+      }
+    }
   }
   currScope().InstantiateDerivedTypes();
   for (const auto &decl : decls) {
diff --git a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
index ca974c25f362c..b015b470b921b 100644
--- a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
+++ b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf
@@ -1,4 +1,4 @@
-! RUN: bbc -emit-hlfir -fcuda -gpu=managed %s -o - | FileCheck %s
+! RUN: bbc -emit-hlfir -fcuda -gpu=mem:managed %s -o - | FileCheck %s
 
 ! Test -gpu=managed flag: allocatable arrays without explicit CUDA attributes
 ! should be implicitly treated as managed.
@@ -88,7 +88,7 @@ subroutine test_mixed_allocatables()
   real, allocatable, device :: d(:)   ! Should stay device
   real, allocatable, pinned :: p(:)   ! Should stay pinned
   real, allocatable, managed :: m(:)  ! Should stay managed (explicit)
-  
+
   allocate(a(10), d(10), p(10), m(10))
   deallocate(a, d, p, m)
 end subroutine
@@ -100,27 +100,7 @@ end subroutine
 ! CHECK: cuf.alloc {{.*}} {bindc_name = "p", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFtest_mixed_allocatablesEp"}
 
 ! -----------------------------------------------------------------------------
-! Test 7: Allocatable in derived type - component allocation uses managed
-! -----------------------------------------------------------------------------
-module mod_derived
-  type :: container
-    real, allocatable :: data(:)
-  end type
-end module
-
-subroutine test_derived_type()
-  use mod_derived
-  type(container) :: c
-  allocate(c%data(100))
-  deallocate(c%data)
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_derived_type()
-! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
-! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
-
-! -----------------------------------------------------------------------------
-! Test 8: Multi-dimensional allocatable array
+! Test 7: Multi-dimensional allocatable array
 ! -----------------------------------------------------------------------------
 subroutine test_multidim()
   real, allocatable :: arr(:,:,:)
@@ -134,7 +114,7 @@ end subroutine
 ! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
 
 ! -----------------------------------------------------------------------------
-! Test 9: Explicit unified attribute is preserved (not overridden to managed)
+! Test 8: Explicit unified attribute is preserved (not overridden to managed)
 ! -----------------------------------------------------------------------------
 subroutine test_explicit_unified()
   real, allocatable, unified :: u(:)
@@ -147,7 +127,7 @@ end subroutine
 ! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<unified>, fortran_attrs = #fir.var_attrs<allocatable>
 
 ! -----------------------------------------------------------------------------
-! Test 10: Dummy arguments - allocatable dummy without explicit attribute
+! Test 9: Dummy arguments - allocatable dummy without explicit attribute
 ! -----------------------------------------------------------------------------
 subroutine test_dummy_allocatable(arr)
   real, allocatable, intent(inout) :: arr(:)
@@ -160,7 +140,7 @@ end subroutine
 ! CHECK: hlfir.declare {{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable, intent_inout>
 
 ! -----------------------------------------------------------------------------
-! Test 11: Module variables - allocatable module variable becomes managed
+! Test 10: Module variables - allocatable module variable becomes managed
 ! -----------------------------------------------------------------------------
 module mod_globals
   real, allocatable :: global_arr(:)
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 607ac5c82cec4..aefb4369fc761 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -638,9 +638,9 @@ int main(int argc, char **argv) {
         Fortran::common::LanguageFeature::CudaWarpMatchFunction, false);
   }
 
-  if (enableGPUMode == "managed") {
+  if (enableGPUMode == "managed" || enableGPUMode == "mem:managed") {
     options.features.Enable(Fortran::common::LanguageFeature::CudaManaged);
-  } else if (enableGPUMode == "unified") {
+  } else if (enableGPUMode == "unified" || enableGPUMode == "mem:unified") {
     options.features.Enable(Fortran::common::LanguageFeature::CudaUnified);
   }
 

>From 1d3074a6d2a14f367bd518efb55bd2d72d9f16c8 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 13:28:54 -0800
Subject: [PATCH 4/5] remove space

---
 flang/lib/Semantics/resolve-names.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index bf0dbcd619841..d50287ffcae31 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3518,7 +3518,6 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol,
           symbol.name());
     }
   }
-
 }
 
 // ModuleVisitor implementation

>From 0a29846bc0479dca68b12fdd7052efb21c16b86b Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 12 Jan 2026 14:03:13 -0800
Subject: [PATCH 5/5] remove check for IsPointer

---
 flang/lib/Semantics/resolve-names.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index d50287ffcae31..1256718fc76df 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -9910,17 +9910,13 @@ void ResolveNamesVisitor::FinishSpecificationPart(
         SetBindNameOn(symbol);
       }
     }
-    // -gpu=managed: implicitly treat allocatable arrays as managed
-    // This is done here after all explicit CUDA attributes have been processed.
+    // -gpu=mem:managed: implicitly treat allocatable arrays as managed.
+    // This is done after all explicit CUDA attributes have been processed.
     if (context().languageFeatures().IsEnabled(
-            common::LanguageFeature::CudaManaged)) {
-      if (auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
-        if (IsAllocatable(symbol) && !IsPointer(symbol) &&
-            !object->cudaDataAttr()) {
+            common::LanguageFeature::CudaManaged))
+      if (auto *object{symbol.detailsIf<ObjectEntityDetails>()})
+        if (IsAllocatable(symbol) && !object->cudaDataAttr())
           object->set_cudaDataAttr(common::CUDADataAttr::Managed);
-        }
-      }
-    }
   }
   currScope().InstantiateDerivedTypes();
   for (const auto &decl : decls) {



More information about the flang-commits mailing list