[flang-commits] [flang] [flang][cuda] Avoid to issue data transfer in device context (PR #90247)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Fri Apr 26 13:02:14 PDT 2024


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/90247

>From c82e690702c04d337eee249ba9c088f71e0bcb03 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 26 Apr 2024 11:29:52 -0700
Subject: [PATCH 1/2] [flang][cuda] Avoid to issue data transfer in device
 context

Data transfer should not be issued in device function.
---
 flang/lib/Lower/Bridge.cpp                   | 24 ++++++++++++++++----
 flang/test/Lower/CUDA/cuda-data-transfer.cuf | 10 ++++++++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 19c00884bd1b7e..e3679ef2afbbc4 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3807,16 +3807,30 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return temps;
   }
 
+  static bool isDeviceContext(fir::FirOpBuilder &builder) {
+    if (builder.getRegion().getParentOfType<fir::CUDAKernelOp>())
+      return true;
+    if (auto funcOp =
+            builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
+      if (auto cudaProcAttr =
+              funcOp.getOperation()->getAttrOfType<fir::CUDAProcAttributeAttr>(
+                  fir::getCUDAAttrName())) {
+        return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host;
+      }
+    }
+    return false;
+  }
+
   void genDataAssignment(
       const Fortran::evaluate::Assignment &assign,
       const Fortran::evaluate::ProcedureRef *userDefinedAssignment) {
     mlir::Location loc = getCurrentLocation();
     fir::FirOpBuilder &builder = getFirOpBuilder();
 
-    bool isInDeviceContext =
-        builder.getRegion().getParentOfType<fir::CUDAKernelOp>();
-    bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
-                          Fortran::evaluate::HasCUDAAttrs(assign.rhs);
+    bool isInDeviceContext = isDeviceContext(builder);
+    bool isCUDATransfer = (Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
+                           Fortran::evaluate::HasCUDAAttrs(assign.rhs)) &&
+                          !isInDeviceContext;
     bool hasCUDAImplicitTransfer =
         Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
     llvm::SmallVector<mlir::Value> implicitTemps;
@@ -3879,7 +3893,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       Fortran::lower::StatementContext localStmtCtx;
       hlfir::Entity rhs = evaluateRhs(localStmtCtx);
       hlfir::Entity lhs = evaluateLhs(localStmtCtx);
-      if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext)
+      if (isCUDATransfer && !hasCUDAImplicitTransfer)
         genCUDADataTransfer(builder, loc, assign, lhs, rhs);
       else
         builder.create<hlfir::AssignOp>(loc, rhs, lhs,
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 70483685d20019..add1052e576c15 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -141,3 +141,13 @@ end subroutine
 ! CHECK: fir.cuda_kernel<<<*, *>>>
 ! CHECK-NOT: fir.cuda_data_transfer
 ! CHECK: hlfir.assign
+
+attributes(global) subroutine sub5(a)
+  integer, device :: a
+  integer :: i
+  i = threadIdx%x + (blockIdx%x - 1) * blockDim%x
+  a = i
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub5
+! CHECK-NOT: fir.cuda_data_transfer

>From 788b9f9d744dc20b750afc13826303e5ebbbeb1f Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 26 Apr 2024 13:02:02 -0700
Subject: [PATCH 2/2] Treat host,device as host context

---
 flang/lib/Lower/Bridge.cpp                   |  6 +++++-
 flang/test/Lower/CUDA/cuda-data-transfer.cuf | 10 +++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e3679ef2afbbc4..eb1660a706a88d 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3807,6 +3807,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return temps;
   }
 
+  // Check if the insertion point is currently in a device context. HostDevice
+  // subprogram are not considered fully device context so it will return false
+  // for it.
   static bool isDeviceContext(fir::FirOpBuilder &builder) {
     if (builder.getRegion().getParentOfType<fir::CUDAKernelOp>())
       return true;
@@ -3815,7 +3818,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       if (auto cudaProcAttr =
               funcOp.getOperation()->getAttrOfType<fir::CUDAProcAttributeAttr>(
                   fir::getCUDAAttrName())) {
-        return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host;
+        return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host
+          && cudaProcAttr.getValue() != fir::CUDAProcAttribute::HostDevice;
       }
     }
     return false;
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index add1052e576c15..0a2608639bce7c 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -145,9 +145,17 @@ end subroutine
 attributes(global) subroutine sub5(a)
   integer, device :: a
   integer :: i
-  i = threadIdx%x + (blockIdx%x - 1) * blockDim%x
   a = i
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub5
 ! CHECK-NOT: fir.cuda_data_transfer
+
+attributes(host,device) subroutine sub6(a)
+  integer, device :: a
+  integer :: i
+  a = i
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub6
+! CHECK: fir.cuda_data_transfer



More information about the flang-commits mailing list