[flang-commits] [flang] 7ba7101 - [flang][cuda] Add pointer attribute to allocate/deallocate ops (#170937)

Fri Dec 5 14:43:31 PST 2025

Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-12-05T22:43:26Z
New Revision: 7ba71012ee89229b86ab12cd31f0e69744131555

URL: https://github.com/llvm/llvm-project/commit/7ba71012ee89229b86ab12cd31f0e69744131555
DIFF: https://github.com/llvm/llvm-project/commit/7ba71012ee89229b86ab12cd31f0e69744131555.diff

LOG: [flang][cuda] Add pointer attribute to allocate/deallocate ops (#170937)

Similar for the double descriptor information added in
https://github.com/llvm/llvm-project/pull/170901, we need to carry over
the pointer information until the op can be converted. The correct
detection would fail if the op is converted late.

Added: 
    

Modified: 
    flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
    flang/lib/Lower/Allocatable.cpp
    flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
    flang/test/Fir/CUDA/cuda-allocate.fir
    flang/test/Lower/CUDA/cuda-allocatable.cuf

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 766a0d6bb8ee0..636879f28a2fb 100644

--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -101,7 +101,7 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
       Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
       Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
       cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
-      UnitAttr:$hasDoubleDescriptor);
+      UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer);
 
   let results = (outs AnyIntegerType:$stat);
 
@@ -129,7 +129,7 @@ def cuf_DeallocateOp : cuf_Op<"deallocate",
   let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
       Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
       cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
-      UnitAttr:$hasDoubleDescriptor);
+      UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer);
 
   let results = (outs AnyIntegerType:$stat);
 

diff  --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 2ae13e2bd73fb..c9a9d935bd615 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -803,7 +803,8 @@ class AllocateStmtHelper {
                builder, loc, retTy, box.getAddr(), errmsg, stream, pinned,
                source, cudaAttr,
                errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
-               doubleDescriptors ? builder.getUnitAttr() : nullptr)
+               doubleDescriptors ? builder.getUnitAttr() : nullptr,
+               box.isPointer() ? builder.getUnitAttr() : nullptr)
         .getResult();
   }
 
@@ -873,7 +874,8 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
   return cuf::DeallocateOp::create(
              builder, loc, retTy, box.getAddr(), errmsg, cudaAttr,
              errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
-             doubleDescriptors ? builder.getUnitAttr() : nullptr)
+             doubleDescriptors ? builder.getUnitAttr() : nullptr,
+             box.isPointer() ? builder.getUnitAttr() : nullptr)
       .getResult();
 }
 

diff  --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
index 2c40991580c2e..6579c2362cd87 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
@@ -322,15 +322,7 @@ struct CUFAllocateOpConversion
     fir::FirOpBuilder builder(rewriter, mod);
     mlir::Location loc = op.getLoc();
 
-    bool isPointer = false;
-
-    if (auto declareOp =
-            mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
-      if (declareOp.getFortranAttrs() &&
-          bitEnumContainsAny(*declareOp.getFortranAttrs(),
-                             fir::FortranVariableFlagsEnum::pointer))
-        isPointer = true;
-
+    bool isPointer = op.getPointer();
     if (op.getHasDoubleDescriptor()) {
       // Allocation for module variable are done with custom runtime entry point
       // so the descriptors can be synchronized.

diff  --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index eb2816145c77a..9d0d181609ada 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -183,7 +183,7 @@ func.func @_QQallocate_stream() {
 func.func @_QPp_alloc() {
   %0 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>> {bindc_name = "complex_array", data_attr = #cuf.cuda<device>, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
   %4 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
-  %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>} -> i32
+  %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>, pointer} -> i32
   return
 }
 
@@ -201,7 +201,7 @@ func.func @_QPpointer_source() {
   %5 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?x?xf32>>> {bindc_name = "a_d", data_attr = #cuf.cuda<device>, uniq_name = "_QFpointer_sourceEa_d"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
   %7 = fir.declare %5 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_sourceEa_d"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
   %8 = fir.load %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
-  %22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
+  %22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, pointer} -> i32
   return
 }
 
@@ -226,7 +226,7 @@ func.func @_QQpointer_sync() attributes {fir.bindc_name = "test"} {
   %3 = fir.convert %c1 : (index) -> i64
   %4 = fir.convert %c10_i32 : (i32) -> i64
   fir.call @_FortranAAllocatableSetBounds(%2, %c0_i32, %3, %4) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
-  %6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
+  %6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
   return
 }
 
@@ -246,7 +246,7 @@ func.func @_QMmod1Ppointer_source_global() {
   %2 = fir.alloca !fir.box<!fir.ptr<!fir.array<?x?xf32>>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"}
   %6 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
   %7 = fir.load %6 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
-  %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
+  %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
   return
 }
 

diff  --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 393faff6046bc..43e716532ecca 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -227,6 +227,14 @@ end
 ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
 ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
 
+subroutine devicepointer()
+  integer, device, pointer :: i(:)
+  allocate(i(10))
+end
+
+! CHECK-LABEL: func.func @_QPdevicepointer()
+! CHECK: cuf.allocate{{.*}}pointer
+
 subroutine cuda_component()
   use globals
   type(t1), pointer, dimension(:) :: d