[flang-commits] [flang] f57f338 - [flang][cuda] Add double descriptor information in allocate/deallocate operations (#170901)
via flang-commits
flang-commits at lists.llvm.org
Fri Dec 5 11:08:49 PST 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-12-05T11:08:45-08:00
New Revision: f57f338313c19ff94959203ea98e96e4ec895c51
URL: https://github.com/llvm/llvm-project/commit/f57f338313c19ff94959203ea98e96e4ec895c51
DIFF: https://github.com/llvm/llvm-project/commit/f57f338313c19ff94959203ea98e96e4ec895c51.diff
LOG: [flang][cuda] Add double descriptor information in allocate/deallocate operations (#170901)
After https://github.com/llvm/llvm-project/pull/169740, the allocate and
deallocate cuf operation can be converted later. Update the way to
recognize double descriptor case by adding this information directly on
the operation itself.
Added:
Modified:
flang/include/flang/Lower/CUDA.h
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
flang/lib/Lower/Allocatable.cpp
flang/lib/Lower/CUDA.cpp
flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
flang/test/Fir/CUDA/cuda-allocate.fir
flang/test/Lower/CUDA/cuda-allocatable.cuf
Removed:
################################################################################
diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h
index ef7cdc42d72f2..704b0356c19ed 100644
--- a/flang/include/flang/Lower/CUDA.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -66,6 +66,9 @@ translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
/// there is a conversion. Return null otherwise.
hlfir::ElementalOp isTransferWithConversion(mlir::Value rhs);
+/// Check if the value is an allocatable with double descriptor.
+bool hasDoubleDescriptor(mlir::Value);
+
} // end namespace Fortran::lower
#endif // FORTRAN_LOWER_CUDA_H
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 920bef99dc996..766a0d6bb8ee0 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -100,7 +100,8 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
Optional<fir_ReferenceType>:$stream,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
- cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat);
+ cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
+ UnitAttr:$hasDoubleDescriptor);
let results = (outs AnyIntegerType:$stat);
@@ -126,9 +127,9 @@ def cuf_DeallocateOp : cuf_Op<"deallocate",
}];
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
- Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
- cuf_DataAttributeAttr:$data_attr,
- UnitAttr:$hasStat);
+ Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
+ cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
+ UnitAttr:$hasDoubleDescriptor);
let results = (outs AnyIntegerType:$stat);
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index e7a6c4df40045..2ae13e2bd73fb 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -798,10 +798,12 @@ class AllocateStmtHelper {
// Keep return type the same as a standard AllocatableAllocate call.
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
+ bool doubleDescriptors = Fortran::lower::hasDoubleDescriptor(box.getAddr());
return cuf::AllocateOp::create(
builder, loc, retTy, box.getAddr(), errmsg, stream, pinned,
source, cudaAttr,
- errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
+ errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
+ doubleDescriptors ? builder.getUnitAttr() : nullptr)
.getResult();
}
@@ -865,11 +867,13 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
? nullptr
: errorManager.errMsgAddr;
- // Keep return type the same as a standard AllocatableAllocate call.
+ // Keep return type the same as a standard AllocatableDeallocate call.
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
+ bool doubleDescriptors = Fortran::lower::hasDoubleDescriptor(box.getAddr());
return cuf::DeallocateOp::create(
builder, loc, retTy, box.getAddr(), errmsg, cudaAttr,
- errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
+ errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
+ doubleDescriptors ? builder.getUnitAttr() : nullptr)
.getResult();
}
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index 9501b0ec60002..fb055286df46b 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -91,3 +91,17 @@ hlfir::ElementalOp Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
return elOp;
return {};
}
+
+bool Fortran::lower::hasDoubleDescriptor(mlir::Value addr) {
+ if (auto declareOp =
+ mlir::dyn_cast_or_null<hlfir::DeclareOp>(addr.getDefiningOp())) {
+ if (mlir::isa_and_nonnull<fir::AddrOfOp>(
+ declareOp.getMemref().getDefiningOp())) {
+ if (declareOp.getDataAttr() &&
+ *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+ return false;
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
index 0acdb24bf62b1..2c40991580c2e 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
@@ -62,28 +62,6 @@ static inline unsigned getMemType(cuf::DataAttribute attr) {
llvm_unreachable("unsupported memory type");
}
-template <typename OpTy>
-static bool hasDoubleDescriptors(OpTy op) {
- if (auto declareOp =
- mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp())) {
- if (mlir::isa_and_nonnull<fir::AddrOfOp>(
- declareOp.getMemref().getDefiningOp())) {
- if (isPinned(declareOp))
- return false;
- return true;
- }
- } else if (auto declareOp = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
- op.getBox().getDefiningOp())) {
- if (mlir::isa_and_nonnull<fir::AddrOfOp>(
- declareOp.getMemref().getDefiningOp())) {
- if (isPinned(declareOp))
- return false;
- return true;
- }
- }
- return false;
-}
-
static bool inDeviceContext(mlir::Operation *op) {
if (op->getParentOfType<cuf::KernelOp>())
return true;
@@ -353,7 +331,7 @@ struct CUFAllocateOpConversion
fir::FortranVariableFlagsEnum::pointer))
isPointer = true;
- if (hasDoubleDescriptors(op)) {
+ if (op.getHasDoubleDescriptor()) {
// Allocation for module variable are done with custom runtime entry point
// so the descriptors can be synchronized.
mlir::func::FuncOp func;
@@ -406,7 +384,7 @@ struct CUFDeallocateOpConversion
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
- if (hasDoubleDescriptors(op)) {
+ if (op.getHasDoubleDescriptor()) {
// Deallocation for module variable are done with custom runtime entry
// point so the descriptors can be synchronized.
mlir::func::FuncOp func =
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index ea7890c9aac52..eb2816145c77a 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -37,8 +37,8 @@ fir.global @_QMmod1Ea {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.
func.func @_QPsub3() {
%0 = fir.address_of(@_QMmod1Ea) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%1:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
- %2 = cuf.allocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
- %3 = cuf.deallocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
+ %2 = cuf.allocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
+ %3 = cuf.deallocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
return
}
@@ -109,7 +109,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} {
%3 = fir.convert %c1 : (index) -> i64
%4 = fir.convert %c10_i32 : (i32) -> i64
fir.call @_FortranAAllocatableSetBounds(%2, %c0_i32, %3, %4) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
- %6 = cuf.allocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -> i32
+ %6 = cuf.allocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
return
}
@@ -158,7 +158,7 @@ func.func @_QMmod1Pallocate_source_global() {
%2 = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xf32>>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"}
%6 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
%7 = fir.load %6 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
- %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.heap<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
+ %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.heap<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
return
}
@@ -226,7 +226,7 @@ func.func @_QQpointer_sync() attributes {fir.bindc_name = "test"} {
%3 = fir.convert %c1 : (index) -> i64
%4 = fir.convert %c10_i32 : (i32) -> i64
fir.call @_FortranAAllocatableSetBounds(%2, %c0_i32, %3, %4) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
- %6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -> i32
+ %6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
return
}
@@ -246,7 +246,7 @@ func.func @_QMmod1Ppointer_source_global() {
%2 = fir.alloca !fir.box<!fir.ptr<!fir.array<?x?xf32>>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"}
%6 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
%7 = fir.load %6 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
- %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
+ %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
return
}
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 2cf8c7d336812..393faff6046bc 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -235,3 +235,21 @@ end subroutine
! CHECK-LABEL: func.func @_QPcuda_component()
! CHECK: cuf.allocate
+
+subroutine module_allocate()
+ use globals
+ allocate(a_device(10))
+ allocate(a_managed(10))
+ allocate(a_pinned(10))
+ deallocate(a_device)
+ deallocate(a_managed)
+ deallocate(a_pinned)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmodule_allocate()
+! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
+! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasDoubleDescriptor} -> i32
+! CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
+! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
+! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasDoubleDescriptor} -> i32
+! CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
More information about the flang-commits
mailing list