[flang-commits] [flang] [flang][cuda] Do inline allocation/deallocation in device code (PR #106628)
via flang-commits
flang-commits at lists.llvm.org
Thu Aug 29 14:07:51 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
ALLOCATE and DEALLOCATE statement can be inlined in device function. This patch updates the condition that determined to inline these actions in lowering.
---
Full diff: https://github.com/llvm/llvm-project/pull/106628.diff
3 Files Affected:
- (modified) flang/lib/Lower/Allocatable.cpp (+19-8)
- (modified) flang/lib/Lower/Bridge.cpp (+2-24)
- (modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+17)
``````````diff
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index d4d999f5c84a09..fb8380ac7e8c51 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -15,6 +15,7 @@
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/ConvertVariable.h"
+#include "flang/Lower/Cuda.h"
#include "flang/Lower/IterationSpace.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/OpenACC.h"
@@ -453,16 +454,22 @@ class AllocateStmtHelper {
void genSimpleAllocation(const Allocation &alloc,
const fir::MutableBoxValue &box) {
- if (!box.isDerived() && !errorManager.hasStatSpec() &&
- !alloc.type.IsPolymorphic() && !alloc.hasCoarraySpec() &&
- !useAllocateRuntime && !box.isPointer() &&
- !Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) {
+ bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+ bool isCudaDeviceContext = Fortran::lower::isCudaDeviceContext(builder);
+ bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
+ !alloc.type.IsPolymorphic() &&
+ !alloc.hasCoarraySpec() && !useAllocateRuntime &&
+ !box.isPointer();
+
+ if (inlineAllocation &&
+ ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
// Pointers must use PointerAllocate so that their deallocations
// can be validated.
genInlinedAllocation(alloc, box);
postAllocationAction(alloc);
return;
}
+
// Generate a sequence of runtime calls.
errorManager.genStatCheck(builder, loc);
genAllocateObjectInit(box);
@@ -473,7 +480,7 @@ class AllocateStmtHelper {
genSetDeferredLengthParameters(alloc, box);
genAllocateObjectBounds(alloc, box);
mlir::Value stat;
- if (!Fortran::semantics::HasCUDAAttr(alloc.getSymbol()))
+ if (!isCudaSymbol)
stat = genRuntimeAllocate(builder, loc, box, errorManager);
else
stat =
@@ -830,10 +837,14 @@ genDeallocate(fir::FirOpBuilder &builder,
mlir::Value declaredTypeDesc = {},
const Fortran::semantics::Symbol *symbol = nullptr) {
bool isCudaSymbol = symbol && Fortran::semantics::HasCUDAAttr(*symbol);
- // Deallocate intrinsic types inline.
- if (!box.isDerived() && !box.isPolymorphic() && !box.hasAssumedRank() &&
+ bool isCudaDeviceContext = Fortran::lower::isCudaDeviceContext(builder);
+ bool inlineDeallocation =
+ !box.isDerived() && !box.isPolymorphic() && !box.hasAssumedRank() &&
!box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
- !useAllocateRuntime && !box.isPointer() && !isCudaSymbol) {
+ !useAllocateRuntime && !box.isPointer();
+ // Deallocate intrinsic types inline.
+ if (inlineDeallocation &&
+ ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
// Pointers must use PointerDeallocate so that their deallocations
// can be validated.
mlir::Value ret = fir::factory::genFreemem(builder, loc, box);
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 078e17bea55859..90943fa92493ce 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -20,6 +20,7 @@
#include "flang/Lower/ConvertExprToHLFIR.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/ConvertVariable.h"
+#include "flang/Lower/Cuda.h"
#include "flang/Lower/HostAssociations.h"
#include "flang/Lower/IO.h"
#include "flang/Lower/IterationSpace.h"
@@ -4377,36 +4378,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
return temps;
}
- // Check if the insertion point is currently in a device context. HostDevice
- // subprogram are not considered fully device context so it will return false
- // for it.
- // If the insertion point is inside an OpenACC region op, it is considered
- // device context.
- static bool isCudaDeviceContext(fir::FirOpBuilder &builder) {
- if (builder.getRegion().getParentOfType<cuf::KernelOp>())
- return true;
- if (builder.getRegion()
- .getParentOfType<mlir::acc::ComputeRegionOpInterface>())
- return true;
- if (auto funcOp =
- builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
- if (auto cudaProcAttr =
- funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
- cuf::getProcAttrName())) {
- return cudaProcAttr.getValue() != cuf::ProcAttribute::Host &&
- cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
- }
- }
- return false;
- }
-
void genDataAssignment(
const Fortran::evaluate::Assignment &assign,
const Fortran::evaluate::ProcedureRef *userDefinedAssignment) {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
- bool isInDeviceContext = isCudaDeviceContext(builder);
+ bool isInDeviceContext = Fortran::lower::isCudaDeviceContext(builder);
bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index cb6ca9af334fc5..fb72f88fe415ca 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -164,3 +164,20 @@ end subroutine
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
! CHECK: cuf.free %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
+
+attributes(global) subroutine sub8()
+ real, device, allocatable :: a(:)
+ allocate(a(2))
+ deallocate(a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub8() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: %[[DESC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: %[[A:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[HEAP:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {fir.must_be_heap = true, uniq_name = "_QFsub8Ea.alloc"}
+! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+! CHECK: %[[EMBOX:.*]] = fir.embox %[[HEAP]](%[[SHAPE]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK: fir.store %[[EMBOX]] to %[[A]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: %[[BOX:.*]] = fir.load %[[A]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+! CHECK: fir.freemem %[[BOXADDR]] : !fir.heap<!fir.array<?xf32>>
``````````
</details>
https://github.com/llvm/llvm-project/pull/106628
More information about the flang-commits
mailing list