[flang-commits] [flang] 5cfd5d1 - [flang][cuda] Do not generate data transfer within cuf kernel (#89973)

via flang-commits flang-commits at lists.llvm.org
Thu Apr 25 08:50:38 PDT 2024


Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-04-25T08:50:34-07:00
New Revision: 5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8

URL: https://github.com/llvm/llvm-project/commit/5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8
DIFF: https://github.com/llvm/llvm-project/commit/5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8.diff

LOG: [flang][cuda] Do not generate data transfer within cuf kernel (#89973)

CUDA data transfer with intrinsic assignment are not meant to be
generated in cuf kernel. This patch fix this issue.

@ImanHosseini

Added: 
    

Modified: 
    flang/lib/Lower/Bridge.cpp
    flang/test/Lower/CUDA/cuda-data-transfer.cuf

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 8b62fe8c022f80..ecad5eb665e01a 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3810,12 +3810,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::Location loc = getCurrentLocation();
     fir::FirOpBuilder &builder = getFirOpBuilder();
 
+    bool isInDeviceContext =
+        builder.getRegion().getParentOfType<fir::CUDAKernelOp>();
     bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
                           Fortran::evaluate::HasCUDAAttrs(assign.rhs);
     bool hasCUDAImplicitTransfer =
         Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
     llvm::SmallVector<mlir::Value> implicitTemps;
-    if (hasCUDAImplicitTransfer)
+    if (hasCUDAImplicitTransfer && !isInDeviceContext)
       implicitTemps = genCUDAImplicitDataTransfer(builder, loc, assign);
 
     // Gather some information about the assignment that will impact how it is
@@ -3874,13 +3876,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       Fortran::lower::StatementContext localStmtCtx;
       hlfir::Entity rhs = evaluateRhs(localStmtCtx);
       hlfir::Entity lhs = evaluateLhs(localStmtCtx);
-      if (isCUDATransfer && !hasCUDAImplicitTransfer)
+      if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext)
         genCUDADataTransfer(builder, loc, assign, lhs, rhs);
       else
         builder.create<hlfir::AssignOp>(loc, rhs, lhs,
                                         isWholeAllocatableAssignment,
                                         keepLhsLengthInAllocatableAssignment);
-      if (hasCUDAImplicitTransfer) {
+      if (hasCUDAImplicitTransfer && !isInDeviceContext) {
         localSymbols.popScope();
         for (mlir::Value temp : implicitTemps)
           builder.create<fir::FreeMemOp>(loc, temp);

diff  --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 4ebd736315bcbc..025d8147e5392d 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -119,3 +119,25 @@ end
 ! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
 ! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
 ! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
+
+
+! Check that fir.cuda_data_transfer are not generated within cuf kernel
+subroutine sub4()
+  integer, parameter :: n = 10
+  real, device :: adev(n)
+  real :: ahost(n)
+  real :: b
+  integer :: i
+
+  adev = ahost
+  !$cuf kernel do <<<*,*>>>
+  do i = 1, n
+    adev(i) = adev(i) + b
+  enddo
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub4()
+! CHECK: fir.cuda_data_transfer
+! CHECK: fir.cuda_kernel<<<*, *>>>
+! CHECK-NOT: fir.cuda_data_transfer
+! CHECK: hlfir.assign


        


More information about the flang-commits mailing list