[flang-commits] [flang] [flang][cuda] Make operations dynamically legal in cuf op conversion (PR #102220)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Aug 6 14:06:04 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/102220
>From a9b44b01879a76d9f29410fc4c085021b82e5d96 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 6 Aug 2024 13:46:55 -0700
Subject: [PATCH] [flang][cuda] Make operations dynamically legal in cuf op
conversion
Some occurence of cuf.alloc, cuf.free, cuf.allocate or cuf.deallocate are
not converted during this pass. Mark these operations dynamically legal.
---
.../Optimizer/Transforms/CufOpConversion.cpp | 17 ++++++++--
flang/test/Fir/CUDA/cuda-allocate.fir | 34 ++++++++++++++++++-
2 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index 70b5037994216..f059d36315a34 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -234,9 +234,20 @@ class CufOpConversion : public fir::impl::CufOpConversionBase<CufOpConversion> {
fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false);
fir::LLVMTypeConverter typeConverter(module, /*applyTBAA=*/false,
/*forceUnifiedTBAATree=*/false, *dl);
-
- target.addIllegalOp<cuf::AllocOp, cuf::AllocateOp, cuf::DeallocateOp,
- cuf::FreeOp>();
+ target.addDynamicallyLegalOp<cuf::AllocOp>([](::cuf::AllocOp op) {
+ return !mlir::isa<fir::BaseBoxType>(op.getInType());
+ });
+ target.addDynamicallyLegalOp<cuf::FreeOp>([](::cuf::FreeOp op) {
+ if (auto refTy = mlir::dyn_cast_or_null<fir::ReferenceType>(
+ op.getDevptr().getType())) {
+ return !mlir::isa<fir::BaseBoxType>(refTy.getEleTy());
+ }
+ return true;
+ });
+ target.addDynamicallyLegalOp<cuf::AllocateOp>(
+ [](::cuf::AllocateOp op) { return isBoxGlobal(op); });
+ target.addDynamicallyLegalOp<cuf::DeallocateOp>(
+ [](::cuf::DeallocateOp op) { return isBoxGlobal(op); });
patterns.insert<CufAllocOpConversion>(ctx, &*dl, &typeConverter);
patterns.insert<CufAllocateOpConversion, CufDeallocateOpConversion,
CufFreeOpConversion>(ctx);
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 1274d3921dd85..569e72f57d6d6 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -14,7 +14,6 @@ func.func @_QPsub1() {
return
}
-
// CHECK-LABEL: func.func @_QPsub1()
// CHECK: %[[DESC_RT_CALL:.*]] = fir.call @_FortranACUFAllocDesciptor(%{{.*}}, %{{.*}}, %{{.*}}) : (i64, !fir.ref<i8>, i32) -> !fir.ref<!fir.box<none>>
// CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
@@ -27,4 +26,37 @@ func.func @_QPsub1() {
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFFreeDesciptor(%[[BOX_NONE]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i8>, i32) -> none
+// Check operations that should not be transformed yet.
+func.func @_QPsub2() {
+ %0 = cuf.alloc !fir.array<10xf32> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>>
+ cuf.free %0 : !fir.ref<!fir.array<10xf32>> {data_attr = #cuf.cuda<device>}
+ return
}
+
+// CHECK-LABEL: func.func @_QPsub2()
+// CHECK: cuf.alloc !fir.array<10xf32>
+// CHECK: cuf.free %{{.*}} : !fir.ref<!fir.array<10xf32>>
+
+fir.global @_QMmod1Ea {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+ %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+ %c0 = arith.constant 0 : index
+ %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+ %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+ fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
+}
+
+func.func @_QPsub3() {
+ %0 = fir.address_of(@_QMmod1Ea) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+ %1:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %2 = cuf.allocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
+ %3 = cuf.deallocate %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
+ return
+}
+
+// CHECK-LABEL: func.func @_QPsub3()
+// CHECK: cuf.allocate
+// CHECK: cuf.deallocate
+
+}
+
+
More information about the flang-commits
mailing list