[flang-commits] [flang] [flang][cuda] Avoid generating cuf.data_transfer in OpenACC region (PR #106435)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Thu Aug 29 08:52:57 PDT 2024


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/106435

>From 97571f0630bb2b80093419a78ef09a7f4d979fc8 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 28 Aug 2024 11:45:37 -0700
Subject: [PATCH 1/2] [flang][cuda] Avoid generating cuf.data_transfer in
 OpenACC region

---
 flang/lib/Lower/Bridge.cpp                   | 15 ++++++-
 flang/test/Lower/CUDA/cuda-data-transfer.cuf | 44 +++++++++++++++++++-
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index c48daba8cf7fab..5e7d5fbcc2173e 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4380,9 +4380,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   // Check if the insertion point is currently in a device context. HostDevice
   // subprogram are not considered fully device context so it will return false
   // for it.
-  static bool isDeviceContext(fir::FirOpBuilder &builder) {
+  // If the insertion point is inside an OpenACC region op, it is considered
+  // device context.
+  static bool isCudaDeviceContext(fir::FirOpBuilder &builder) {
     if (builder.getRegion().getParentOfType<cuf::KernelOp>())
       return true;
+    if (builder.getRegion().getParentOfType<mlir::acc::LoopOp>())
+      return true;
+    if (builder.getRegion().getParentOfType<mlir::acc::KernelsOp>())
+      return true;
+    if (builder.getRegion().getParentOfType<mlir::acc::ParallelOp>())
+      return true;
+    if (builder.getRegion().getParentOfType<mlir::acc::SerialOp>())
+      return true;
     if (auto funcOp =
             builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
       if (auto cudaProcAttr =
@@ -4401,7 +4411,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::Location loc = getCurrentLocation();
     fir::FirOpBuilder &builder = getFirOpBuilder();
 
-    bool isInDeviceContext = isDeviceContext(builder);
+    bool isInDeviceContext = isCudaDeviceContext(builder);
+
     bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
                            Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
                           !isInDeviceContext;
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 42b37fb89e4ce2..f189bf9b621082 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -1,4 +1,4 @@
-! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenacc -fcuda %s -o - | FileCheck %s
 
 ! Test CUDA Fortran data transfer using assignment statements.
 
@@ -290,3 +290,45 @@ end subroutine
 ! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
 ! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %[[ARG1]](%{{.*}}) dummy_scope %{{.*}} {uniq_name = "_QFsub15Ea_host"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
 ! CHECK: cuf.data_transfer %[[AHOST]]#1 to %[[ADEV]]#1, %[[SHAPE]] : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>
+
+! Check that cuf.data_transfer are not generated within OpenACC region
+subroutine sub16()
+  integer, parameter :: n = 10
+  real, device :: adev(n)
+  real :: ahost(n)
+  real, managed :: b
+  integer :: i
+
+  adev = ahost
+  !$acc parallel loop deviceptr(adev) 
+  do i = 1, n
+    adev(i) = adev(i) + b
+  enddo
+
+  !$acc kernels deviceptr(adev) 
+  do i = 1, n
+    adev(i) = adev(i) + b
+  enddo
+  !$acc end kernels
+
+
+  !$acc serial deviceptr(adev) 
+  do i = 1, n
+    adev(i) = adev(i) + b
+  enddo
+  !$acc end serial
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub16()
+! CHECK: cuf.data_transfer
+! CHECK: acc.parallel
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
+
+! CHECK: acc.kernels
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
+
+! CHECK: acc.serial
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign

>From bc6df359a8a773277890d5e96960f2321acc42d3 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 29 Aug 2024 08:52:25 -0700
Subject: [PATCH 2/2] Use ComputeRegionOpInterface

---
 flang/lib/Lower/Bridge.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 5e7d5fbcc2173e..078e17bea55859 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4385,13 +4385,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   static bool isCudaDeviceContext(fir::FirOpBuilder &builder) {
     if (builder.getRegion().getParentOfType<cuf::KernelOp>())
       return true;
-    if (builder.getRegion().getParentOfType<mlir::acc::LoopOp>())
-      return true;
-    if (builder.getRegion().getParentOfType<mlir::acc::KernelsOp>())
-      return true;
-    if (builder.getRegion().getParentOfType<mlir::acc::ParallelOp>())
-      return true;
-    if (builder.getRegion().getParentOfType<mlir::acc::SerialOp>())
+    if (builder.getRegion()
+            .getParentOfType<mlir::acc::ComputeRegionOpInterface>())
       return true;
     if (auto funcOp =
             builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {



More information about the flang-commits mailing list