[flang-commits] [flang] [flang][cuda] Make sure to issue freemem for the allocated temp (PR #98078)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Mon Jul 8 14:14:51 PDT 2024


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/98078

When implicit data transfer is created, make sure we generate the `freemem` op on the `allocmem` result value and not the declare op value. 

>From 50af4a2a0d8e2c5403d990154d1ed1d1a878da8a Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 8 Jul 2024 14:12:08 -0700
Subject: [PATCH] [flang][cuda] Make sure to issue freemem for the allocated
 temp

---
 flang/lib/Lower/Bridge.cpp                   |  9 +++++++--
 flang/test/Lower/CUDA/cuda-data-transfer.cuf | 17 ++++++++++++++---
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 60422dd336762..60845f706defe 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4294,8 +4294,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
           auto [temp, cleanup] =
               hlfir::createTempFromMold(loc, builder, entity);
           auto needCleanup = fir::getIntIfConstant(cleanup);
-          if (needCleanup && *needCleanup)
-            temps.push_back(temp);
+          if (needCleanup && *needCleanup) {
+            if (auto declareOp =
+                    mlir::dyn_cast<hlfir::DeclareOp>(temp.getDefiningOp()))
+              temps.push_back(declareOp.getMemref());
+            else
+              temps.push_back(temp);
+          }
           addSymbol(sym,
                     hlfir::translateToExtendedValue(loc, builder, temp).first,
                     /*forced=*/true);
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 5dbf39c58c449..874c31c580719 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -103,7 +103,7 @@ end
 ! CHECK: cuf.data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
 ! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
 ! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref<!fir.array<10xi32>>
-! CHECK: fir.freemem %[[DECL_TEMP]]#0 : !fir.heap<!fir.array<10xi32>>
+! CHECK: fir.freemem %[[TEMP]] : !fir.heap<!fir.array<10xi32>>
 
 subroutine sub3()
   use mod1
@@ -213,8 +213,6 @@ subroutine sub10(a, b)
   res = a + b
 end subroutine
 
-
-
 ! CHECK-LABEL: func.func @_QPsub10(
 ! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}
 
@@ -222,3 +220,16 @@ end subroutine
 ! CHECK: cuf.data_transfer %[[A]]#1 to %{{.*}}#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
 ! CHECK-NOT: cuf.data_transfer
 
+subroutine sub11(a, b, n)
+  integer :: n
+  integer :: a(n)
+  integer, allocatable, device :: b(:)
+  integer :: res(10)
+
+  res = a + b
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub11
+! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %14#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: cuf.data_transfer
+! CHECK: fir.freemem %[[TEMP]] : !fir.heap<!fir.array<?xi32>>



More information about the flang-commits mailing list