[flang-commits] [flang] [flang][cuda] Do not consider PINNED as device attribute (PR #95988)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Jun 18 14:08:06 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/95988
>From 4e7a0aa8fcee8d73dc77b04f4b32e512bbb71267 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 18 Jun 2024 14:01:02 -0700
Subject: [PATCH 1/2] [flang][cuda] Do not consider PINNED as device attribute
PINNED is a CUDA data attribute meant for the host variables. Do not
consider it when computing the number of device variables in assignment
for the cuda data transfer.
---
flang/include/flang/Evaluate/tools.h | 15 +++++++++------
flang/lib/Lower/Bridge.cpp | 8 ++++----
flang/lib/Semantics/check-cuda.cpp | 4 ++--
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 11 ++++++++++-
4 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 9c3dfb7a6f6ab..340325b59c0ab 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1231,12 +1231,13 @@ bool CheckForCoindexedObject(parser::ContextualMessages &,
const std::string &argName);
// Get the number of distinct symbols with CUDA attribute in the expression.
-template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
+template <typename A> inline int GetNbOfCUDADeviceSymbols(const A &expr) {
semantics::UnorderedSymbolSet symbols;
for (const Symbol &sym : CollectSymbols(expr)) {
if (const auto *details =
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
symbols.insert(sym);
}
}
@@ -1246,8 +1247,8 @@ template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
// Check if any of the symbols part of the expression has a CUDA data
// attribute.
-template <typename A> inline bool HasCUDAAttrs(const A &expr) {
- return GetNbOfCUDASymbols(expr) > 0;
+template <typename A> inline bool HasCUDADeviceAttrs(const A &expr) {
+ return GetNbOfCUDADeviceSymbols(expr) > 0;
}
/// Check if the expression is a mix of host and device variables that require
@@ -1258,7 +1259,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
for (const Symbol &sym : CollectSymbols(expr)) {
if (const auto *details =
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
++deviceSymbols;
} else {
if (sym.owner().IsDerivedType()) {
@@ -1267,7 +1269,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
.GetSymbol()
->GetUltimate()
.detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
++deviceSymbols;
}
}
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a3088b55a3f78..e379732efa042 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4107,8 +4107,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
void genCUDADataTransfer(fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::evaluate::Assignment &assign,
hlfir::Entity &lhs, hlfir::Entity &rhs) {
- bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs);
- bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs);
+ bool lhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs);
+ bool rhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs);
auto getRefIfLoaded = [](mlir::Value val) -> mlir::Value {
if (auto loadOp =
@@ -4229,8 +4229,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
fir::FirOpBuilder &builder = getFirOpBuilder();
bool isInDeviceContext = isDeviceContext(builder);
- bool isCUDATransfer = (Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
- Fortran::evaluate::HasCUDAAttrs(assign.rhs)) &&
+ bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
+ Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
!isInDeviceContext;
bool hasCUDAImplicitTransfer =
Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 8af50cac8ef56..5b3ea214d63e9 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -548,8 +548,8 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
return;
}
- int nbLhs{evaluate::GetNbOfCUDASymbols(assign->lhs)};
- int nbRhs{evaluate::GetNbOfCUDASymbols(assign->rhs)};
+ int nbLhs{evaluate::GetNbOfCUDADeviceSymbols(assign->lhs)};
+ int nbRhs{evaluate::GetNbOfCUDADeviceSymbols(assign->rhs)};
// device to host transfer with more than one device object on the rhs is not
// legal.
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 3b407b9c35faf..ccba7bd4d878a 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -180,7 +180,6 @@ end subroutine
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.data_transfer %[[A]]#0 to %[[C]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-
subroutine sub8(a, b, n)
integer :: n
integer, device :: a(n)
@@ -195,3 +194,13 @@ end subroutine
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
! CHECK: cuf.data_transfer %[[A]]#0 to %[[B]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.box<!fir.array<?xi32>>
+
+subroutine sub9(a)
+ integer, allocatable :: a(:)
+ do concurrent (i = 1 : 10)
+ a(i) = 10 + a(i)
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub9
+! CHECK-NOT: cuf.data_transfer
>From e23e0074aebea262f23f58614eae4a44ae7e9821 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 18 Jun 2024 14:07:53 -0700
Subject: [PATCH 2/2] Add missing pinned
---
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index ccba7bd4d878a..065d21978e405 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -196,7 +196,7 @@ end subroutine
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.box<!fir.array<?xi32>>
subroutine sub9(a)
- integer, allocatable :: a(:)
+ integer, pinned, allocatable :: a(:)
do concurrent (i = 1 : 10)
a(i) = 10 + a(i)
end do
More information about the flang-commits
mailing list