[flang-commits] [flang] 5cfd5d1 - [flang][cuda] Do not generate data transfer within cuf kernel (#89973)
via flang-commits
flang-commits at lists.llvm.org
Thu Apr 25 08:50:38 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-04-25T08:50:34-07:00
New Revision: 5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8
URL: https://github.com/llvm/llvm-project/commit/5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8
DIFF: https://github.com/llvm/llvm-project/commit/5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8.diff
LOG: [flang][cuda] Do not generate data transfer within cuf kernel (#89973)
CUDA data transfer with intrinsic assignment are not meant to be
generated in cuf kernel. This patch fix this issue.
@ImanHosseini
Added:
Modified:
flang/lib/Lower/Bridge.cpp
flang/test/Lower/CUDA/cuda-data-transfer.cuf
Removed:
################################################################################
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 8b62fe8c022f80..ecad5eb665e01a 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3810,12 +3810,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
+ bool isInDeviceContext =
+ builder.getRegion().getParentOfType<fir::CUDAKernelOp>();
bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
Fortran::evaluate::HasCUDAAttrs(assign.rhs);
bool hasCUDAImplicitTransfer =
Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
llvm::SmallVector<mlir::Value> implicitTemps;
- if (hasCUDAImplicitTransfer)
+ if (hasCUDAImplicitTransfer && !isInDeviceContext)
implicitTemps = genCUDAImplicitDataTransfer(builder, loc, assign);
// Gather some information about the assignment that will impact how it is
@@ -3874,13 +3876,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
Fortran::lower::StatementContext localStmtCtx;
hlfir::Entity rhs = evaluateRhs(localStmtCtx);
hlfir::Entity lhs = evaluateLhs(localStmtCtx);
- if (isCUDATransfer && !hasCUDAImplicitTransfer)
+ if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext)
genCUDADataTransfer(builder, loc, assign, lhs, rhs);
else
builder.create<hlfir::AssignOp>(loc, rhs, lhs,
isWholeAllocatableAssignment,
keepLhsLengthInAllocatableAssignment);
- if (hasCUDAImplicitTransfer) {
+ if (hasCUDAImplicitTransfer && !isInDeviceContext) {
localSymbols.popScope();
for (mlir::Value temp : implicitTemps)
builder.create<fir::FreeMemOp>(loc, temp);
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 4ebd736315bcbc..025d8147e5392d 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -119,3 +119,25 @@ end
! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
+
+
+! Check that fir.cuda_data_transfer are not generated within cuf kernel
+subroutine sub4()
+ integer, parameter :: n = 10
+ real, device :: adev(n)
+ real :: ahost(n)
+ real :: b
+ integer :: i
+
+ adev = ahost
+ !$cuf kernel do <<<*,*>>>
+ do i = 1, n
+ adev(i) = adev(i) + b
+ enddo
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub4()
+! CHECK: fir.cuda_data_transfer
+! CHECK: fir.cuda_kernel<<<*, *>>>
+! CHECK-NOT: fir.cuda_data_transfer
+! CHECK: hlfir.assign
More information about the flang-commits
mailing list