[flang-commits] [flang] [flang][CUDA] Only apply implicit managed attribute when CUDA Fortran is enabled (PR #195353)

Fri May 1 13:50:13 PDT 2026

https://github.com/wangzpgi created https://github.com/llvm/llvm-project/pull/195353

The implicit-managed tagging added in #175648 was intended for CUDA Fortran allocatables. However, the gate was just LanguageFeature::CudaManaged, so the tagging also fires on non-CUDA-Fortran translation units when -gpu=mem:managed is in effect.

This patch adds a LanguageFeature::CUDA check so the implicit tagging only fires for CUDA Fortran TUs (driver-set -fcuda or .cuf/.CUF source). Adds a regression test that bbc -gpu=managed without -fcuda on a .f90 source must not produce any cuf.* ops or #cuf.cuda<managed> attributes.

>From bb78e9a52eaa40fef92cdfaca859be80068b18f5 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Fri, 1 May 2026 12:59:04 -0700
Subject: [PATCH 1/2] Only set managed for CUF when -gpu=mem:managed

---
 flang/lib/Semantics/resolve-names.cpp         |  7 ++-
 .../CUDA/cuda-gpu-managed-without-fcuda.f90   | 61 +++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 86eefc57f9749..562126766e6eb 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -10157,8 +10157,13 @@ void ResolveNamesVisitor::FinishSpecificationPart(
     }
     // Implicitly treat allocatable arrays as managed when feature is enabled.
     // This is done after all explicit CUDA attributes have been processed.
+    // Only applies when CUDA Fortran is enabled; otherwise -gpu=mem:managed
+    // on a non-CUDA-Fortran translation unit (e.g. pure OpenACC) would
+    // incorrectly route every allocatable through the CUDA Fortran managed
+    // descriptor pipeline.
     if (context().languageFeatures().IsEnabled(
-            common::LanguageFeature::CudaManaged))
+            common::LanguageFeature::CudaManaged) &&
+        context().languageFeatures().IsEnabled(common::LanguageFeature::CUDA))
       if (auto *object{symbol.detailsIf<ObjectEntityDetails>()})
         if (IsAllocatable(symbol) && !object->cudaDataAttr())
           object->set_cudaDataAttr(common::CUDADataAttr::Managed);
diff --git a/flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90 b/flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90
new file mode 100644
index 0000000000000..8c0dc1846160e
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90
@@ -0,0 +1,61 @@
+! RUN: bbc -emit-hlfir -gpu=managed %s -o - | FileCheck %s
+! RUN: bbc -emit-hlfir -gpu=mem:managed %s -o - | FileCheck %s
+
+! Test that -gpu=managed (a.k.a. -gpu=mem:managed) does NOT implicitly tag
+! plain Fortran allocatables as CUDA managed when CUDA Fortran is not
+! enabled. The implicit-managed tagging is a CUDA Fortran convenience and
+! should only fire when -fcuda is also in effect (or the source is .cuf).
+! Otherwise, a non-CUDA-Fortran translation unit (e.g. pure OpenACC code
+! compiled with -gpu=mem:managed by the driver) would route every
+! allocatable through the CUDA Fortran managed descriptor pipeline and
+! crash at runtime in cudaGetSymbolAddress.
+
+! -----------------------------------------------------------------------------
+! Test 1: Plain allocatable stays plain - no cuf.* ops, no managed tagging
+! -----------------------------------------------------------------------------
+subroutine test_no_implicit_managed()
+  real, allocatable :: a(:)
+  allocate(a(100))
+  a = 1.0
+  deallocate(a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_no_implicit_managed()
+! CHECK-NOT:   cuf.alloc
+! CHECK-NOT:   data_attr = #cuf.cuda<managed>
+! CHECK-NOT:   allocator_idx = 3
+! CHECK-NOT:   cuf.allocate
+! CHECK-NOT:   cuf.deallocate
+! CHECK:       fir.call @_FortranAAllocatableAllocate
+! CHECK:       fir.call @_FortranAAllocatableDeallocate
+
+! -----------------------------------------------------------------------------
+! Test 2: Multi-dimensional allocatable also stays plain
+! -----------------------------------------------------------------------------
+subroutine test_no_implicit_managed_multidim()
+  real, allocatable :: arr(:,:,:)
+  allocate(arr(10,20,30))
+  arr = 0.0
+  deallocate(arr)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_no_implicit_managed_multidim()
+! CHECK-NOT:   data_attr = #cuf.cuda<managed>
+! CHECK-NOT:   allocator_idx = 3
+
+! -----------------------------------------------------------------------------
+! Test 3: Module-level allocatable global also stays plain
+! -----------------------------------------------------------------------------
+module mod_no_managed
+  real, allocatable :: g(:)
+end module
+
+subroutine test_no_implicit_managed_module()
+  use mod_no_managed
+  allocate(g(50))
+  deallocate(g)
+end subroutine
+
+! CHECK:     fir.global @_QMmod_no_managedEg : !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK-NOT: data_attr = #cuf.cuda<managed>
+! CHECK-NOT: allocator_idx = 3

>From 80c1b227192d9fa9089706d54bf69a856646c3c1 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Fri, 1 May 2026 13:47:09 -0700
Subject: [PATCH 2/2] update test

---
 .../CUDA/cuda-gpu-managed-without-fcuda.f90   | 44 +++++++++----------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90 b/flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90
index 8c0dc1846160e..eb767bdfcad3d 100644
--- a/flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90
+++ b/flang/test/Lower/CUDA/cuda-gpu-managed-without-fcuda.f90
@@ -10,9 +10,6 @@
 ! allocatable through the CUDA Fortran managed descriptor pipeline and
 ! crash at runtime in cudaGetSymbolAddress.
 
-! -----------------------------------------------------------------------------
-! Test 1: Plain allocatable stays plain - no cuf.* ops, no managed tagging
-! -----------------------------------------------------------------------------
 subroutine test_no_implicit_managed()
   real, allocatable :: a(:)
   allocate(a(100))
@@ -20,18 +17,6 @@ subroutine test_no_implicit_managed()
   deallocate(a)
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_no_implicit_managed()
-! CHECK-NOT:   cuf.alloc
-! CHECK-NOT:   data_attr = #cuf.cuda<managed>
-! CHECK-NOT:   allocator_idx = 3
-! CHECK-NOT:   cuf.allocate
-! CHECK-NOT:   cuf.deallocate
-! CHECK:       fir.call @_FortranAAllocatableAllocate
-! CHECK:       fir.call @_FortranAAllocatableDeallocate
-
-! -----------------------------------------------------------------------------
-! Test 2: Multi-dimensional allocatable also stays plain
-! -----------------------------------------------------------------------------
 subroutine test_no_implicit_managed_multidim()
   real, allocatable :: arr(:,:,:)
   allocate(arr(10,20,30))
@@ -39,13 +24,6 @@ subroutine test_no_implicit_managed_multidim()
   deallocate(arr)
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_no_implicit_managed_multidim()
-! CHECK-NOT:   data_attr = #cuf.cuda<managed>
-! CHECK-NOT:   allocator_idx = 3
-
-! -----------------------------------------------------------------------------
-! Test 3: Module-level allocatable global also stays plain
-! -----------------------------------------------------------------------------
 module mod_no_managed
   real, allocatable :: g(:)
 end module
@@ -56,6 +34,26 @@ subroutine test_no_implicit_managed_module()
   deallocate(g)
 end subroutine
 
-! CHECK:     fir.global @_QMmod_no_managedEg : !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK-LABEL: func.func @_QPtest_no_implicit_managed()
+! CHECK-NOT:     cuf.alloc
+! CHECK-NOT:     data_attr = #cuf.cuda<managed>
+! CHECK-NOT:     allocator_idx = 3
+! CHECK-NOT:     cuf.allocate
+! CHECK-NOT:     cuf.deallocate
+! CHECK-NOT:     cuf.free
+
+! CHECK-LABEL: func.func @_QPtest_no_implicit_managed_multidim()
+! CHECK-NOT:     cuf.alloc
+! CHECK-NOT:     data_attr = #cuf.cuda<managed>
+! CHECK-NOT:     allocator_idx = 3
+
+! CHECK-LABEL: func.func @_QPtest_no_implicit_managed_module()
+! CHECK-NOT:     cuf.allocate
+! CHECK-NOT:     cuf.deallocate
+! CHECK-NOT:     data_attr = #cuf.cuda<managed>
+! CHECK-NOT:     allocator_idx = 3
+
+! Module global must not be tagged as managed either.
+! CHECK:     fir.global @_QMmod_no_managedEg
 ! CHECK-NOT: data_attr = #cuf.cuda<managed>
 ! CHECK-NOT: allocator_idx = 3