[flang-commits] [flang] 9435edf - [flang][cuda] Lower DEALLOCATE for device variables (#89091)
via flang-commits
flang-commits at lists.llvm.org
Wed Apr 17 13:45:26 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-04-17T13:45:22-07:00
New Revision: 9435edf628cb2011652897e1f10f7c55313d50be
URL: https://github.com/llvm/llvm-project/commit/9435edf628cb2011652897e1f10f7c55313d50be
DIFF: https://github.com/llvm/llvm-project/commit/9435edf628cb2011652897e1f10f7c55313d50be.diff
LOG: [flang][cuda] Lower DEALLOCATE for device variables (#89091)
Replace the runtime call to `AllocatableDeallocate` for CUDA device
variable to the newly added `fir.cuda_deallocate` operation.
This is similar with #88980
A third patch will handle the case of automatic dealloctaion of device
allocatable variables
Added:
Modified:
flang/lib/Lower/Allocatable.cpp
flang/test/Lower/CUDA/cuda-allocatable.cuf
Removed:
################################################################################
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 1d434d512d0c5c..38f61528d7e28a 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -799,6 +799,28 @@ static void postDeallocationAction(Fortran::lower::AbstractConverter &converter,
Fortran::lower::attachDeclarePostDeallocAction(converter, builder, sym);
}
+static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ const fir::MutableBoxValue &box,
+ ErrorManager &errorManager,
+ const Fortran::semantics::Symbol &sym) {
+ fir::CUDADataAttributeAttr cudaAttr =
+ Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
+ sym);
+ mlir::Value errmsg =
+ mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
+ ? nullptr
+ : errorManager.errMsgAddr;
+
+ // Keep return type the same as a standard AllocatableAllocate call.
+ mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
+ return builder
+ .create<fir::CUDADeallocateOp>(
+ loc, retTy, box.getAddr(), errmsg, cudaAttr,
+ errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
+ .getResult();
+}
+
// Generate deallocation of a pointer/allocatable.
static mlir::Value
genDeallocate(fir::FirOpBuilder &builder,
@@ -806,10 +828,11 @@ genDeallocate(fir::FirOpBuilder &builder,
const fir::MutableBoxValue &box, ErrorManager &errorManager,
mlir::Value declaredTypeDesc = {},
const Fortran::semantics::Symbol *symbol = nullptr) {
+ bool isCudaSymbol = symbol && Fortran::semantics::HasCUDAAttr(*symbol);
// Deallocate intrinsic types inline.
if (!box.isDerived() && !box.isPolymorphic() &&
!box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
- !useAllocateRuntime && !box.isPointer()) {
+ !useAllocateRuntime && !box.isPointer() && !isCudaSymbol) {
// Pointers must use PointerDeallocate so that their deallocations
// can be validated.
mlir::Value ret = fir::factory::genFreemem(builder, loc, box);
@@ -820,8 +843,12 @@ genDeallocate(fir::FirOpBuilder &builder,
// Use runtime calls to deallocate descriptor cases. Sync MutableBoxValue
// with its descriptor before and after calls if needed.
errorManager.genStatCheck(builder, loc);
- mlir::Value stat =
- genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+ mlir::Value stat;
+ if (!isCudaSymbol)
+ stat =
+ genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+ else
+ stat = genCudaDeallocate(builder, loc, box, errorManager, *symbol);
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
if (symbol)
postDeallocationAction(converter, builder, *symbol);
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 55223011e8d9e9..5b10334ecdbc14 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -5,6 +5,8 @@
subroutine sub1()
real, allocatable, device :: a(:)
allocate(a(10))
+
+ deallocate(a)
end subroutine
! CHECK-LABEL: func.func @_QPsub1()
@@ -13,10 +15,14 @@ end subroutine
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+
subroutine sub2()
real, allocatable, managed :: a(:)
integer :: istat
allocate(a(10), stat=istat)
+
+ deallocate(a, stat=istat)
end subroutine
! CHECK-LABEL: func.func @_QPsub2()
@@ -28,6 +34,9 @@ end subroutine
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
subroutine sub3()
integer, allocatable, pinned :: a(:,:)
logical :: plog
@@ -92,6 +101,8 @@ subroutine sub7()
integer :: istat
character(50) :: err
allocate(a(100), stat=istat, errmsg=err)
+
+ deallocate(a, stat=istat, errmsg=err)
end subroutine
! CHECK-LABEL: func.func @_QPsub7()
@@ -105,3 +116,7 @@ end subroutine
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
+! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
More information about the flang-commits
mailing list