[flang-commits] [flang] [flang][cuda] Avoid generating cuf.data_transfer in OpenACC region (PR #106435)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu Aug 29 08:52:57 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/106435
>From 97571f0630bb2b80093419a78ef09a7f4d979fc8 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 28 Aug 2024 11:45:37 -0700
Subject: [PATCH 1/2] [flang][cuda] Avoid generating cuf.data_transfer in
OpenACC region
---
flang/lib/Lower/Bridge.cpp | 15 ++++++-
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 44 +++++++++++++++++++-
2 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index c48daba8cf7fab..5e7d5fbcc2173e 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4380,9 +4380,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// Check if the insertion point is currently in a device context. HostDevice
// subprogram are not considered fully device context so it will return false
// for it.
- static bool isDeviceContext(fir::FirOpBuilder &builder) {
+ // If the insertion point is inside an OpenACC region op, it is considered
+ // device context.
+ static bool isCudaDeviceContext(fir::FirOpBuilder &builder) {
if (builder.getRegion().getParentOfType<cuf::KernelOp>())
return true;
+ if (builder.getRegion().getParentOfType<mlir::acc::LoopOp>())
+ return true;
+ if (builder.getRegion().getParentOfType<mlir::acc::KernelsOp>())
+ return true;
+ if (builder.getRegion().getParentOfType<mlir::acc::ParallelOp>())
+ return true;
+ if (builder.getRegion().getParentOfType<mlir::acc::SerialOp>())
+ return true;
if (auto funcOp =
builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
if (auto cudaProcAttr =
@@ -4401,7 +4411,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
- bool isInDeviceContext = isDeviceContext(builder);
+ bool isInDeviceContext = isCudaDeviceContext(builder);
+
bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
!isInDeviceContext;
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 42b37fb89e4ce2..f189bf9b621082 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -1,4 +1,4 @@
-! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenacc -fcuda %s -o - | FileCheck %s
! Test CUDA Fortran data transfer using assignment statements.
@@ -290,3 +290,45 @@ end subroutine
! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %[[ARG1]](%{{.*}}) dummy_scope %{{.*}} {uniq_name = "_QFsub15Ea_host"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
! CHECK: cuf.data_transfer %[[AHOST]]#1 to %[[ADEV]]#1, %[[SHAPE]] : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>
+
+! Check that cuf.data_transfer are not generated within OpenACC region
+subroutine sub16()
+ integer, parameter :: n = 10
+ real, device :: adev(n)
+ real :: ahost(n)
+ real, managed :: b
+ integer :: i
+
+ adev = ahost
+ !$acc parallel loop deviceptr(adev)
+ do i = 1, n
+ adev(i) = adev(i) + b
+ enddo
+
+ !$acc kernels deviceptr(adev)
+ do i = 1, n
+ adev(i) = adev(i) + b
+ enddo
+ !$acc end kernels
+
+
+ !$acc serial deviceptr(adev)
+ do i = 1, n
+ adev(i) = adev(i) + b
+ enddo
+ !$acc end serial
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub16()
+! CHECK: cuf.data_transfer
+! CHECK: acc.parallel
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
+
+! CHECK: acc.kernels
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
+
+! CHECK: acc.serial
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
>From bc6df359a8a773277890d5e96960f2321acc42d3 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 29 Aug 2024 08:52:25 -0700
Subject: [PATCH 2/2] Use ComputeRegionOpInterface
---
flang/lib/Lower/Bridge.cpp | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 5e7d5fbcc2173e..078e17bea55859 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4385,13 +4385,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
static bool isCudaDeviceContext(fir::FirOpBuilder &builder) {
if (builder.getRegion().getParentOfType<cuf::KernelOp>())
return true;
- if (builder.getRegion().getParentOfType<mlir::acc::LoopOp>())
- return true;
- if (builder.getRegion().getParentOfType<mlir::acc::KernelsOp>())
- return true;
- if (builder.getRegion().getParentOfType<mlir::acc::ParallelOp>())
- return true;
- if (builder.getRegion().getParentOfType<mlir::acc::SerialOp>())
+ if (builder.getRegion()
+ .getParentOfType<mlir::acc::ComputeRegionOpInterface>())
return true;
if (auto funcOp =
builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
More information about the flang-commits
mailing list