[flang-commits] [flang] 3b1cc61 - [flang][cuda] Add missing pointer deallocation entry point (#192566)

via flang-commits flang-commits at lists.llvm.org
Thu Apr 16 16:49:10 PDT 2026


Author: Valentin Clement (バレンタイン クレメン)
Date: 2026-04-16T16:49:04-07:00
New Revision: 3b1cc610162b118ba422b5dccde2c3718ed55614

URL: https://github.com/llvm/llvm-project/commit/3b1cc610162b118ba422b5dccde2c3718ed55614
DIFF: https://github.com/llvm/llvm-project/commit/3b1cc610162b118ba422b5dccde2c3718ed55614.diff

LOG: [flang][cuda] Add missing pointer deallocation entry point (#192566)

We were missing the deallocation entry point for pointer and wiring all
to allocatable deallocate which will trigger Invalid descriptor error.

Added: 
    

Modified: 
    flang-rt/lib/cuda/pointer.cpp
    flang/include/flang/Runtime/CUDA/pointer.h
    flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
    flang/test/Fir/CUDA/cuda-allocate.fir

Removed: 
    


################################################################################
diff  --git a/flang-rt/lib/cuda/pointer.cpp b/flang-rt/lib/cuda/pointer.cpp
index bc990c5d27e21..9fdcd1bd9b4db 100644
--- a/flang-rt/lib/cuda/pointer.cpp
+++ b/flang-rt/lib/cuda/pointer.cpp
@@ -82,6 +82,24 @@ int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
   return stat;
 }
 
+int RTDEF(CUFPointerDeallocate)(Descriptor &desc, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+  // Perform the standard allocation.
+  int stat{
+      RTNAME(PointerDeallocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
+#ifndef RT_DEVICE_COMPILATION
+  // Descriptor synchronization is only done when the deallocation is done
+  // from the host.
+  if (stat == StatOk) {
+    void *deviceAddr{
+        RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
+    RTNAME(CUFDescriptorSync)
+    ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
+  }
+#endif
+  return stat;
+}
+
 RT_EXT_API_GROUP_END
 
 } // extern "C"

diff  --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
index 4e49691d127e1..b6162aa4cf786 100644
--- a/flang/include/flang/Runtime/CUDA/pointer.h
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -45,6 +45,12 @@ int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
     const char *sourceFile = nullptr, int sourceLine = 0,
     bool sourceIsDevice = false);
 
+/// Perform deallocation of the descriptor with synchronization of it when
+/// necessary.
+int RTDECL(CUFPointerDeallocate)(Descriptor &, bool hasStat = false,
+    const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+    int sourceLine = 0);
+
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda

diff  --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
index 4e2bcb6e95aee..1b11ba99f8d4b 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
@@ -24,6 +24,7 @@
 #include "flang/Runtime/CUDA/pointer.h"
 #include "flang/Runtime/allocatable.h"
 #include "flang/Runtime/allocator-registry-consts.h"
+#include "flang/Runtime/pointer.h"
 #include "flang/Support/Fortran.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Matchers.h"
@@ -383,12 +384,17 @@ struct CUFDeallocateOpConversion
     fir::FirOpBuilder builder(rewriter, mod);
     mlir::Location loc = op.getLoc();
 
+    bool isPointer = op.getPointer();
+
     if (op.getHasDoubleDescriptor()) {
       // Deallocation for module variable are done with custom runtime entry
       // point so the descriptors can be synchronized.
       mlir::func::FuncOp func =
-          fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>(
-              loc, builder);
+          isPointer
+              ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerDeallocate)>(
+                    loc, builder)
+              : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>(
+                    loc, builder);
       return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
     }
 
@@ -396,8 +402,11 @@ struct CUFDeallocateOpConversion
     // AllocatableDeallocate as the dedicated deallocator is set in the
     // descriptor before the call.
     mlir::func::FuncOp func =
-        fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(loc,
-                                                                     builder);
+        isPointer
+            ? fir::runtime::getRuntimeFunc<mkRTKey(PointerDeallocate)>(loc,
+                                                                       builder)
+            : fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(
+                  loc, builder);
     return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
   }
 };

diff  --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index be26111c12c61..c70646e312a55 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -52,6 +52,27 @@ func.func @_QPsub3() {
 // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
+func.func @_QPsub31() {
+  %0 = fir.address_of(@_QMmod1Ea) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %1:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+  %3 = cuf.deallocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, pointer} -> i32
+  return
+}
+
+// CHECK-LABEL: func.func @_QPsub31
+// CHECK: fir.call @_FortranAPointerDeallocate
+
+func.func @_QPsub32() {
+  %0 = fir.address_of(@_QMmod1Ea) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %1:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+  %3 = cuf.deallocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
+  return
+}
+
+// CHECK-LABEL: func.func @_QPsub32
+// CHECK: fir.call @_FortranACUFPointerDeallocate
+
+
 func.func @_QPsub4() attributes {cuf.proc_attr = #cuf.cuda_proc<device>} {
   %0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
   %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)


        


More information about the flang-commits mailing list