[flang-commits] [flang] 9435edf - [flang][cuda] Lower DEALLOCATE for device variables (#89091)

Wed Apr 17 13:45:26 PDT 2024

Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-04-17T13:45:22-07:00
New Revision: 9435edf628cb2011652897e1f10f7c55313d50be

URL: https://github.com/llvm/llvm-project/commit/9435edf628cb2011652897e1f10f7c55313d50be
DIFF: https://github.com/llvm/llvm-project/commit/9435edf628cb2011652897e1f10f7c55313d50be.diff

LOG: [flang][cuda] Lower DEALLOCATE for device variables (#89091)

Replace the runtime call to `AllocatableDeallocate` for CUDA device
variable to the newly added `fir.cuda_deallocate` operation.

This is similar with #88980 

A third patch will handle the case of automatic dealloctaion of device
allocatable variables

Added: 
    

Modified: 
    flang/lib/Lower/Allocatable.cpp
    flang/test/Lower/CUDA/cuda-allocatable.cuf

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 1d434d512d0c5c..38f61528d7e28a 100644

--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -799,6 +799,28 @@ static void postDeallocationAction(Fortran::lower::AbstractConverter &converter,
     Fortran::lower::attachDeclarePostDeallocAction(converter, builder, sym);
 }
 
+static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
+                                     mlir::Location loc,
+                                     const fir::MutableBoxValue &box,
+                                     ErrorManager &errorManager,
+                                     const Fortran::semantics::Symbol &sym) {
+  fir::CUDADataAttributeAttr cudaAttr =
+      Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
+                                                       sym);
+  mlir::Value errmsg =
+      mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
+          ? nullptr
+          : errorManager.errMsgAddr;
+
+  // Keep return type the same as a standard AllocatableAllocate call.
+  mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
+  return builder
+      .create<fir::CUDADeallocateOp>(
+          loc, retTy, box.getAddr(), errmsg, cudaAttr,
+          errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
+      .getResult();
+}
+
 // Generate deallocation of a pointer/allocatable.
 static mlir::Value
 genDeallocate(fir::FirOpBuilder &builder,
@@ -806,10 +828,11 @@ genDeallocate(fir::FirOpBuilder &builder,
               const fir::MutableBoxValue &box, ErrorManager &errorManager,
               mlir::Value declaredTypeDesc = {},
               const Fortran::semantics::Symbol *symbol = nullptr) {
+  bool isCudaSymbol = symbol && Fortran::semantics::HasCUDAAttr(*symbol);
   // Deallocate intrinsic types inline.
   if (!box.isDerived() && !box.isPolymorphic() &&
       !box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
-      !useAllocateRuntime && !box.isPointer()) {
+      !useAllocateRuntime && !box.isPointer() && !isCudaSymbol) {
     // Pointers must use PointerDeallocate so that their deallocations
     // can be validated.
     mlir::Value ret = fir::factory::genFreemem(builder, loc, box);
@@ -820,8 +843,12 @@ genDeallocate(fir::FirOpBuilder &builder,
   // Use runtime calls to deallocate descriptor cases. Sync MutableBoxValue
   // with its descriptor before and after calls if needed.
   errorManager.genStatCheck(builder, loc);
-  mlir::Value stat =
-      genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+  mlir::Value stat;
+  if (!isCudaSymbol)
+    stat =
+        genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+  else
+    stat = genCudaDeallocate(builder, loc, box, errorManager, *symbol);
   fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
   if (symbol)
     postDeallocationAction(converter, builder, *symbol);

diff  --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 55223011e8d9e9..5b10334ecdbc14 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -5,6 +5,8 @@
 subroutine sub1()
   real, allocatable, device :: a(:)
   allocate(a(10))
+
+  deallocate(a)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub1()
@@ -13,10 +15,14 @@ end subroutine
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
 
+! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+
 subroutine sub2()
   real, allocatable, managed :: a(:)
   integer :: istat
   allocate(a(10), stat=istat)
+
+  deallocate(a, stat=istat)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub2()
@@ -28,6 +34,9 @@ end subroutine
 ! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
 
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
 subroutine sub3()
   integer, allocatable, pinned :: a(:,:)
   logical :: plog
@@ -92,6 +101,8 @@ subroutine sub7()
   integer :: istat
   character(50) :: err
   allocate(a(100), stat=istat, errmsg=err)
+
+  deallocate(a, stat=istat, errmsg=err)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub7()
@@ -105,3 +116,7 @@ end subroutine
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
+! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>