[flang-commits] [flang] [flang][cuda] Use cuf.alloc/cuf.free for local descriptor (PR #98518)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu Jul 11 13:23:01 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/98518
>From 305390953742920a943bb48187f35d15c2148a1d Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 11 Jul 2024 11:51:43 -0700
Subject: [PATCH 1/3] [flang][cuda] Use cuf.alloc/cuf.free for local descriptor
holding cuda data
---
flang/include/flang/Semantics/tools.h | 8 ++------
flang/lib/Lower/ConvertVariable.cpp | 8 ++++++--
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 6 ++++--
flang/test/Fir/cuf-invalid.fir | 11 +----------
flang/test/Lower/CUDA/cuda-allocatable.cuf | 14 +++++++-------
5 files changed, 20 insertions(+), 27 deletions(-)
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 0b5308d9242de..0fcba3131fad1 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -222,7 +222,6 @@ inline bool HasCUDAAttr(const Symbol &sym) {
}
inline bool NeedCUDAAlloc(const Symbol &sym) {
- bool inDeviceSubprogram{IsCUDADeviceContext(&sym.owner())};
if (IsDummy(sym)) {
return false;
}
@@ -230,11 +229,8 @@ inline bool NeedCUDAAlloc(const Symbol &sym) {
if (details->cudaDataAttr() &&
(*details->cudaDataAttr() == common::CUDADataAttr::Device ||
*details->cudaDataAttr() == common::CUDADataAttr::Managed ||
- *details->cudaDataAttr() == common::CUDADataAttr::Unified)) {
- // Descriptor is allocated on host when in host context.
- if (IsAllocatable(sym)) {
- return inDeviceSubprogram;
- }
+ *details->cudaDataAttr() == common::CUDADataAttr::Unified ||
+ *details->cudaDataAttr() == common::CUDADataAttr::Pinned)) {
return true;
}
}
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index e5a71c5ec5b4a..b239b70e7f5c1 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -715,8 +715,12 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
auto idxTy = builder.getIndexType();
for (mlir::Value sh : elidedShape)
indices.push_back(builder.createConvert(loc, idxTy, sh));
- return builder.create<cuf::AllocOp>(loc, ty, nm, symNm, dataAttr, lenParams,
- indices);
+ mlir::Value alloc = builder.create<cuf::AllocOp>(
+ loc, ty, nm, symNm, dataAttr, lenParams, indices);
+ converter.getFctCtx().attachCleanup([&builder, loc, alloc, dataAttr]() {
+ builder.create<cuf::FreeOp>(loc, alloc, dataAttr);
+ });
+ return alloc;
}
// Let the builder do all the heavy lifting.
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 53092bed5720b..f7b36b208a7de 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -54,9 +54,11 @@ template <typename Op>
static llvm::LogicalResult checkCudaAttr(Op op) {
if (op.getDataAttr() == cuf::DataAttribute::Device ||
op.getDataAttr() == cuf::DataAttribute::Managed ||
- op.getDataAttr() == cuf::DataAttribute::Unified)
+ op.getDataAttr() == cuf::DataAttribute::Unified ||
+ op.getDataAttr() == cuf::DataAttribute::Pinned)
return mlir::success();
- return op.emitOpError("expect device, managed or unified cuda attribute");
+ return op.emitOpError()
+ << "expect device, managed, pinned or unified cuda attribute";
}
llvm::LogicalResult cuf::AllocOp::verify() { return checkCudaAttr(*this); }
diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir
index 6e18e48ac82fc..06e08d14b2435 100644
--- a/flang/test/Fir/cuf-invalid.fir
+++ b/flang/test/Fir/cuf-invalid.fir
@@ -88,18 +88,9 @@ func.func @_QPsub1() {
// -----
-func.func @_QPsub1() {
- // expected-error at +1{{'cuf.alloc' op expect device, managed or unified cuda attribute}}
- %0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
- cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
- return
-}
-
-// -----
-
func.func @_QPsub1() {
%0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
- // expected-error at +1{{'cuf.free' op expect device, managed or unified cuda attribute}}
+ // expected-error at +1{{'cuf.free' op expect device, managed, pinned or unified cuda attribute}}
cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
return
}
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 74a3ec100a8f2..1e75b45a38dfa 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -10,7 +10,7 @@ subroutine sub1()
end subroutine
! CHECK-LABEL: func.func @_QPsub1()
-! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
@@ -35,7 +35,7 @@ subroutine sub2()
end subroutine
! CHECK-LABEL: func.func @_QPsub2()
-! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub2Ea"}
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -57,7 +57,7 @@ subroutine sub3()
end subroutine
! CHECK-LABEL: func.func @_QPsub3()
-! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", uniq_name = "_QFsub3Ea"}
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
@@ -74,7 +74,7 @@ subroutine sub4()
end subroutine
! CHECK-LABEL: func.func @_QPsub4()
-! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub4Ea"}
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -92,7 +92,7 @@ subroutine sub5()
end subroutine
! CHECK-LABEL: func.func @_QPsub5()
-! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub5Ea"}
+! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -112,7 +112,7 @@ subroutine sub6()
end subroutine
! CHECK-LABEL: func.func @_QPsub6()
-! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub6Ea"}
+! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -133,7 +133,7 @@ subroutine sub7()
end subroutine
! CHECK-LABEL: func.func @_QPsub7()
-! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub7Ea"}
+! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
>From 4b0b77fcabe2bcc17fc34f45282a7b12fcf605e9 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 11 Jul 2024 13:14:05 -0700
Subject: [PATCH 2/3] Make sure cuf.free is placed after conditional
deallocation of the data
---
flang/lib/Lower/ConvertVariable.cpp | 33 +++++++++++-----------
flang/test/Lower/CUDA/cuda-allocatable.cuf | 7 +++++
2 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index b239b70e7f5c1..3156e64112972 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -717,9 +717,9 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
indices.push_back(builder.createConvert(loc, idxTy, sh));
mlir::Value alloc = builder.create<cuf::AllocOp>(
loc, ty, nm, symNm, dataAttr, lenParams, indices);
- converter.getFctCtx().attachCleanup([&builder, loc, alloc, dataAttr]() {
- builder.create<cuf::FreeOp>(loc, alloc, dataAttr);
- });
+ // converter.getFctCtx().attachCleanup([&builder, loc, alloc, dataAttr]() {
+ // builder.create<cuf::FreeOp>(loc, alloc, dataAttr);
+ // });
return alloc;
}
@@ -931,6 +931,19 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
finalizeAtRuntime(converter, var, symMap);
if (mustBeDefaultInitializedAtRuntime(var))
defaultInitializeAtRuntime(converter, var, symMap);
+ if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
+ auto *builder = &converter.getFirOpBuilder();
+ mlir::Location loc = converter.getCurrentLocation();
+ fir::ExtendedValue exv =
+ converter.getSymbolExtendedValue(var.getSymbol(), &symMap);
+ auto *sym = &var.getSymbol();
+ converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() {
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(),
+ *sym);
+ builder->create<cuf::FreeOp>(loc, fir::getBase(exv), dataAttr);
+ });
+ }
if (std::optional<VariableCleanUp> cleanup =
needDeallocationOrFinalization(var)) {
auto *builder = &converter.getFirOpBuilder();
@@ -954,22 +967,10 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
"trying to deallocate entity not lowered as allocatable");
Fortran::lower::genDeallocateIfAllocated(*converterPtr, *mutableBox,
loc, sym);
+
});
}
}
- if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
- auto *builder = &converter.getFirOpBuilder();
- mlir::Location loc = converter.getCurrentLocation();
- fir::ExtendedValue exv =
- converter.getSymbolExtendedValue(var.getSymbol(), &symMap);
- auto *sym = &var.getSymbol();
- converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() {
- cuf::DataAttributeAttr dataAttr =
- Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(),
- *sym);
- builder->create<cuf::FreeOp>(loc, fir::getBase(exv), dataAttr);
- });
- }
}
//===----------------------------------------------------------------===//
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 1e75b45a38dfa..82c1063507535 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -25,6 +25,7 @@ end subroutine
! CHECK: fir.if %[[NE_C0]] {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
+! CHECK: cuf.free %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
subroutine sub2()
real, allocatable, managed :: a(:)
@@ -49,6 +50,7 @@ end subroutine
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>} -> i32
! CHECK: }
+! CHECK: cuf.free %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>}
subroutine sub3()
integer, allocatable, pinned :: a(:,:)
@@ -66,6 +68,7 @@ end subroutine
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
! CHECK: }
+! CHECK: cuf.free %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {data_attr = #cuf.cuda<pinned>}
subroutine sub4()
real, allocatable, device :: a(:)
@@ -84,6 +87,7 @@ end subroutine
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
+! CHECK: cuf.free %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
subroutine sub5()
real, allocatable, device :: a(:)
@@ -104,6 +108,7 @@ end subroutine
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
+! CHECK: cuf.free %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
subroutine sub6()
real, allocatable, device :: a(:)
@@ -122,6 +127,7 @@ end subroutine
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
+! CHECK: cuf.free %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
subroutine sub7()
real, allocatable, device :: a(:)
@@ -150,3 +156,4 @@ end subroutine
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
+! CHECK: cuf.free %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
>From 3fc607c9e5da968d470f5c76beccb6e4f3087f77 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 11 Jul 2024 13:22:50 -0700
Subject: [PATCH 3/3] Remove commented code
---
flang/lib/Lower/ConvertVariable.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 3156e64112972..47ad48fb322cc 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -717,9 +717,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
indices.push_back(builder.createConvert(loc, idxTy, sh));
mlir::Value alloc = builder.create<cuf::AllocOp>(
loc, ty, nm, symNm, dataAttr, lenParams, indices);
- // converter.getFctCtx().attachCleanup([&builder, loc, alloc, dataAttr]() {
- // builder.create<cuf::FreeOp>(loc, alloc, dataAttr);
- // });
return alloc;
}
More information about the flang-commits
mailing list