[flang-commits] [flang] 3b1cc61 - [flang][cuda] Add missing pointer deallocation entry point (#192566)
via flang-commits
flang-commits at lists.llvm.org
Thu Apr 16 16:49:10 PDT 2026
Author: Valentin Clement (バレンタイン クレメン)
Date: 2026-04-16T16:49:04-07:00
New Revision: 3b1cc610162b118ba422b5dccde2c3718ed55614
URL: https://github.com/llvm/llvm-project/commit/3b1cc610162b118ba422b5dccde2c3718ed55614
DIFF: https://github.com/llvm/llvm-project/commit/3b1cc610162b118ba422b5dccde2c3718ed55614.diff
LOG: [flang][cuda] Add missing pointer deallocation entry point (#192566)
We were missing the deallocation entry point for pointer and wiring all
to allocatable deallocate which will trigger Invalid descriptor error.
Added:
Modified:
flang-rt/lib/cuda/pointer.cpp
flang/include/flang/Runtime/CUDA/pointer.h
flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
flang/test/Fir/CUDA/cuda-allocate.fir
Removed:
################################################################################
diff --git a/flang-rt/lib/cuda/pointer.cpp b/flang-rt/lib/cuda/pointer.cpp
index bc990c5d27e21..9fdcd1bd9b4db 100644
--- a/flang-rt/lib/cuda/pointer.cpp
+++ b/flang-rt/lib/cuda/pointer.cpp
@@ -82,6 +82,24 @@ int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
return stat;
}
+int RTDEF(CUFPointerDeallocate)(Descriptor &desc, bool hasStat,
+ const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+ // Perform the standard allocation.
+ int stat{
+ RTNAME(PointerDeallocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
+#ifndef RT_DEVICE_COMPILATION
+ // Descriptor synchronization is only done when the deallocation is done
+ // from the host.
+ if (stat == StatOk) {
+ void *deviceAddr{
+ RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
+ RTNAME(CUFDescriptorSync)
+ ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
+ }
+#endif
+ return stat;
+}
+
RT_EXT_API_GROUP_END
} // extern "C"
diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
index 4e49691d127e1..b6162aa4cf786 100644
--- a/flang/include/flang/Runtime/CUDA/pointer.h
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -45,6 +45,12 @@ int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
const char *sourceFile = nullptr, int sourceLine = 0,
bool sourceIsDevice = false);
+/// Perform deallocation of the descriptor with synchronization of it when
+/// necessary.
+int RTDECL(CUFPointerDeallocate)(Descriptor &, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
+
} // extern "C"
} // namespace Fortran::runtime::cuda
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
index 4e2bcb6e95aee..1b11ba99f8d4b 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
@@ -24,6 +24,7 @@
#include "flang/Runtime/CUDA/pointer.h"
#include "flang/Runtime/allocatable.h"
#include "flang/Runtime/allocator-registry-consts.h"
+#include "flang/Runtime/pointer.h"
#include "flang/Support/Fortran.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Matchers.h"
@@ -383,12 +384,17 @@ struct CUFDeallocateOpConversion
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
+ bool isPointer = op.getPointer();
+
if (op.getHasDoubleDescriptor()) {
// Deallocation for module variable are done with custom runtime entry
// point so the descriptors can be synchronized.
mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>(
- loc, builder);
+ isPointer
+ ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerDeallocate)>(
+ loc, builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>(
+ loc, builder);
return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
}
@@ -396,8 +402,11 @@ struct CUFDeallocateOpConversion
// AllocatableDeallocate as the dedicated deallocator is set in the
// descriptor before the call.
mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(loc,
- builder);
+ isPointer
+ ? fir::runtime::getRuntimeFunc<mkRTKey(PointerDeallocate)>(loc,
+ builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(
+ loc, builder);
return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
}
};
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index be26111c12c61..c70646e312a55 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -52,6 +52,27 @@ func.func @_QPsub3() {
// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+func.func @_QPsub31() {
+ %0 = fir.address_of(@_QMmod1Ea) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+ %1:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %3 = cuf.deallocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, pointer} -> i32
+ return
+}
+
+// CHECK-LABEL: func.func @_QPsub31
+// CHECK: fir.call @_FortranAPointerDeallocate
+
+func.func @_QPsub32() {
+ %0 = fir.address_of(@_QMmod1Ea) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+ %1:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %3 = cuf.deallocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
+ return
+}
+
+// CHECK-LABEL: func.func @_QPsub32
+// CHECK: fir.call @_FortranACUFPointerDeallocate
+
+
func.func @_QPsub4() attributes {cuf.proc_attr = #cuf.cuda_proc<device>} {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
More information about the flang-commits
mailing list