[flang-commits] [flang] dfc21ac - [flang][cuda] Convert global allocation for pinned variable (#106807)
via flang-commits
flang-commits at lists.llvm.org
Tue Sep 3 14:27:19 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-09-03T14:27:16-07:00
New Revision: dfc21acdfa0eb7f6f6bb563445959fb18ea863da
URL: https://github.com/llvm/llvm-project/commit/dfc21acdfa0eb7f6f6bb563445959fb18ea863da
DIFF: https://github.com/llvm/llvm-project/commit/dfc21acdfa0eb7f6f6bb563445959fb18ea863da.diff
LOG: [flang][cuda] Convert global allocation for pinned variable (#106807)
ALLOCATE/DEALLOCATE statements for module allocatable variable with the
pinned attribute can be lowered to the standard runtime call and do not
need further action since these variables will have a unique descriptor
that is on the host.
Added:
Modified:
flang/lib/Optimizer/Transforms/CufOpConversion.cpp
flang/test/Fir/CUDA/cuda-allocate.fir
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index d391ede82c2707..c22c74d3f78af7 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -33,17 +33,25 @@ using namespace Fortran::runtime::cuda;
namespace {
template <typename OpTy>
-static bool isBoxGlobal(OpTy op) {
+static bool needDoubleDescriptor(OpTy op) {
if (auto declareOp =
mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp())) {
if (mlir::isa_and_nonnull<fir::AddrOfOp>(
- declareOp.getMemref().getDefiningOp()))
+ declareOp.getMemref().getDefiningOp())) {
+ if (declareOp.getDataAttr() &&
+ *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+ return false;
return true;
+ }
} else if (auto declareOp = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
op.getBox().getDefiningOp())) {
if (mlir::isa_and_nonnull<fir::AddrOfOp>(
- declareOp.getMemref().getDefiningOp()))
+ declareOp.getMemref().getDefiningOp())) {
+ if (declareOp.getDataAttr() &&
+ *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+ return false;
return true;
+ }
}
return false;
}
@@ -100,7 +108,7 @@ struct CufAllocateOpConversion
// TODO: Allocation of module variable will need more work as the descriptor
// will be duplicated and needs to be synced after allocation.
- if (isBoxGlobal(op))
+ if (needDoubleDescriptor(op))
return mlir::failure();
// Allocation for local descriptor falls back on the standard runtime
@@ -125,7 +133,7 @@ struct CufDeallocateOpConversion
mlir::PatternRewriter &rewriter) const override {
// TODO: Allocation of module variable will need more work as the descriptor
// will be duplicated and needs to be synced after allocation.
- if (isBoxGlobal(op))
+ if (needDoubleDescriptor(op))
return mlir::failure();
// Deallocation for local descriptor falls back on the standard runtime
@@ -274,9 +282,9 @@ class CufOpConversion : public fir::impl::CufOpConversionBase<CufOpConversion> {
return true;
});
target.addDynamicallyLegalOp<cuf::AllocateOp>(
- [](::cuf::AllocateOp op) { return isBoxGlobal(op); });
+ [](::cuf::AllocateOp op) { return needDoubleDescriptor(op); });
target.addDynamicallyLegalOp<cuf::DeallocateOp>(
- [](::cuf::DeallocateOp op) { return isBoxGlobal(op); });
+ [](::cuf::DeallocateOp op) { return needDoubleDescriptor(op); });
target.addLegalDialect<fir::FIROpsDialect>();
patterns.insert<CufAllocOpConversion>(ctx, &*dl, &typeConverter);
patterns.insert<CufAllocateOpConversion, CufDeallocateOpConversion,
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index a9bc7a8518e90e..1c17e7447e5c97 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -68,6 +68,31 @@ func.func @_QPsub4() attributes {cuf.proc_attr = #cuf.cuda_proc<device>} {
// CHECK: fir.alloca
// CHECK-NOT: cuf.free
+fir.global @_QMglobalsEa_pinned {data_attr = #cuf.cuda<pinned>} : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+ %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+ %c0 = arith.constant 0 : index
+ %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+ %2 = fir.embox %0(%1) {allocator_idx = 1 : i32} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+ fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
}
+func.func @_QPsub5() {
+ %4 = fir.address_of(@_QMglobalsEa_pinned) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+ %5:2 = hlfir.declare %4 {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMglobalsEa_pinned"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %c1 = arith.constant 1 : index
+ %c10_i32 = arith.constant 10 : i32
+ %c0_i32 = arith.constant 0 : i32
+ %6 = fir.convert %5#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
+ %7 = fir.convert %c1 : (index) -> i64
+ %8 = fir.convert %c10_i32 : (i32) -> i64
+ %9 = fir.call @_FortranAAllocatableSetBounds(%6, %c0_i32, %7, %8) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> none
+ %10 = cuf.allocate %5#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
+ %11 = cuf.deallocate %5#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
+ return
+}
+
+// CHECK-LABEL: func.func @_QPsub5()
+// CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+} // end of module
More information about the flang-commits
mailing list