[flang-commits] [flang] [flang][cuda] Do not consider PINNED as device attribute (PR #95988)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Jun 18 15:06:27 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/95988
>From 4e7a0aa8fcee8d73dc77b04f4b32e512bbb71267 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 18 Jun 2024 14:01:02 -0700
Subject: [PATCH 1/3] [flang][cuda] Do not consider PINNED as device attribute
PINNED is a CUDA data attribute meant for the host variables. Do not
consider it when computing the number of device variables in assignment
for the cuda data transfer.
---
flang/include/flang/Evaluate/tools.h | 15 +++++++++------
flang/lib/Lower/Bridge.cpp | 8 ++++----
flang/lib/Semantics/check-cuda.cpp | 4 ++--
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 11 ++++++++++-
4 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 9c3dfb7a6f6ab..340325b59c0ab 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1231,12 +1231,13 @@ bool CheckForCoindexedObject(parser::ContextualMessages &,
const std::string &argName);
// Get the number of distinct symbols with CUDA attribute in the expression.
-template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
+template <typename A> inline int GetNbOfCUDADeviceSymbols(const A &expr) {
semantics::UnorderedSymbolSet symbols;
for (const Symbol &sym : CollectSymbols(expr)) {
if (const auto *details =
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
symbols.insert(sym);
}
}
@@ -1246,8 +1247,8 @@ template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
// Check if any of the symbols part of the expression has a CUDA data
// attribute.
-template <typename A> inline bool HasCUDAAttrs(const A &expr) {
- return GetNbOfCUDASymbols(expr) > 0;
+template <typename A> inline bool HasCUDADeviceAttrs(const A &expr) {
+ return GetNbOfCUDADeviceSymbols(expr) > 0;
}
/// Check if the expression is a mix of host and device variables that require
@@ -1258,7 +1259,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
for (const Symbol &sym : CollectSymbols(expr)) {
if (const auto *details =
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
++deviceSymbols;
} else {
if (sym.owner().IsDerivedType()) {
@@ -1267,7 +1269,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
.GetSymbol()
->GetUltimate()
.detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
++deviceSymbols;
}
}
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a3088b55a3f78..e379732efa042 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4107,8 +4107,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
void genCUDADataTransfer(fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::evaluate::Assignment &assign,
hlfir::Entity &lhs, hlfir::Entity &rhs) {
- bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs);
- bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs);
+ bool lhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs);
+ bool rhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs);
auto getRefIfLoaded = [](mlir::Value val) -> mlir::Value {
if (auto loadOp =
@@ -4229,8 +4229,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
fir::FirOpBuilder &builder = getFirOpBuilder();
bool isInDeviceContext = isDeviceContext(builder);
- bool isCUDATransfer = (Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
- Fortran::evaluate::HasCUDAAttrs(assign.rhs)) &&
+ bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
+ Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
!isInDeviceContext;
bool hasCUDAImplicitTransfer =
Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 8af50cac8ef56..5b3ea214d63e9 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -548,8 +548,8 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
return;
}
- int nbLhs{evaluate::GetNbOfCUDASymbols(assign->lhs)};
- int nbRhs{evaluate::GetNbOfCUDASymbols(assign->rhs)};
+ int nbLhs{evaluate::GetNbOfCUDADeviceSymbols(assign->lhs)};
+ int nbRhs{evaluate::GetNbOfCUDADeviceSymbols(assign->rhs)};
// device to host transfer with more than one device object on the rhs is not
// legal.
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 3b407b9c35faf..ccba7bd4d878a 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -180,7 +180,6 @@ end subroutine
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.data_transfer %[[A]]#0 to %[[C]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-
subroutine sub8(a, b, n)
integer :: n
integer, device :: a(n)
@@ -195,3 +194,13 @@ end subroutine
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
! CHECK: cuf.data_transfer %[[A]]#0 to %[[B]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.box<!fir.array<?xi32>>
+
+subroutine sub9(a)
+ integer, allocatable :: a(:)
+ do concurrent (i = 1 : 10)
+ a(i) = 10 + a(i)
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub9
+! CHECK-NOT: cuf.data_transfer
>From e23e0074aebea262f23f58614eae4a44ae7e9821 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 18 Jun 2024 14:07:53 -0700
Subject: [PATCH 2/3] Add missing pinned
---
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index ccba7bd4d878a..065d21978e405 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -196,7 +196,7 @@ end subroutine
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.box<!fir.array<?xi32>>
subroutine sub9(a)
- integer, allocatable :: a(:)
+ integer, pinned, allocatable :: a(:)
do concurrent (i = 1 : 10)
a(i) = 10 + a(i)
end do
>From 522fcc8ca39d59e2949ee1eb95a3568c05dee2a9 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 18 Jun 2024 14:44:30 -0700
Subject: [PATCH 3/3] Avoid implicit data_transfer for PINNED variable
---
flang/lib/Lower/Bridge.cpp | 3 ++-
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 18 ++++++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e379732efa042..404d1f6d39446 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4177,7 +4177,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (const auto *details =
sym.GetUltimate()
.detailsIf<Fortran::semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != Fortran::common::CUDADataAttr::Pinned) {
if (sym.owner().IsDerivedType() && IsAllocatable(sym.GetUltimate()))
TODO(loc, "Device resident allocatable derived-type component");
// TODO: This should probably being checked in semantic and give a
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 065d21978e405..5dbf39c58c449 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -204,3 +204,21 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub9
! CHECK-NOT: cuf.data_transfer
+
+subroutine sub10(a, b)
+ integer, device :: a
+ integer, allocatable, pinned :: b
+ integer :: res
+
+ res = a + b
+end subroutine
+
+
+
+! CHECK-LABEL: func.func @_QPsub10(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}
+
+! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub10Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.data_transfer %[[A]]#1 to %{{.*}}#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
+! CHECK-NOT: cuf.data_transfer
+
More information about the flang-commits
mailing list