[flang-commits] [flang] [flang][cuda] Implicitly load cudadevice module in device/global subprogram (PR #92038)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Fri May 17 13:33:57 PDT 2024


Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Valentin Clement
 <clementval at gmail.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/92038 at github.com>


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/92038

>From 0f710596541b2c446f4c7908dad349473bcad557 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 9 May 2024 14:53:52 -0700
Subject: [PATCH 1/7] [flang][cuda] Implicitly load cudadevice module in
 device/global function

---
 flang/include/flang/Semantics/semantics.h     |  5 +-
 flang/lib/Semantics/resolve-names.cpp         | 20 +++++
 flang/lib/Semantics/semantics.cpp             |  8 ++
 flang/module/cudadevice.f90                   | 74 +++++++++++++++++++
 .../Semantics/cuf-device-procedures01.cuf     | 35 +++++++++
 .../Semantics/cuf-device-procedures02.cuf     | 17 +++++
 flang/tools/f18/CMakeLists.txt                | 11 ++-
 7 files changed, 165 insertions(+), 5 deletions(-)
 create mode 100644 flang/module/cudadevice.f90
 create mode 100644 flang/test/Semantics/cuf-device-procedures01.cuf
 create mode 100644 flang/test/Semantics/cuf-device-procedures02.cuf

diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index e6ba71d53e92b..167e613816394 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -215,8 +215,10 @@ class SemanticsContext {
   void UseFortranBuiltinsModule();
   const Scope *GetBuiltinsScope() const { return builtinsScope_; }
 
-  void UsePPCBuiltinTypesModule();
   const Scope &GetCUDABuiltinsScope();
+  const Scope &GetCUDADeviceScope();
+
+  void UsePPCBuiltinTypesModule();
   void UsePPCBuiltinsModule();
   Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
   const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
   const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
   Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
   std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
+  std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
   const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
   std::list<parser::Program> modFileParseTrees_;
   std::unique_ptr<CommonBlockMap> commonBlockMap_;
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index e2875081b732c..1ade71805b70d 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3797,6 +3797,26 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
         subp->set_cudaSubprogramAttrs(attr);
       }
     }
+    if (auto attrs{subp->cudaSubprogramAttrs()}) {
+      if (*attrs == common::CUDASubprogramAttrs::Global ||
+          *attrs == common::CUDASubprogramAttrs::Device) {
+        const Scope &scope{currScope()};
+        const Scope *mod{FindModuleContaining(scope)};
+        if (mod && mod->GetName().value() == "cudadevice") {
+          return false;
+        }
+        // Implicitly USE the cudadevice module by copying its symbols in the
+        // current scope.
+        const Scope &cudaDeviceScope{context().GetCUDADeviceScope()};
+        for (auto sym : cudaDeviceScope.GetSymbols()) {
+          if (!currScope().FindSymbol(sym->name())) {
+            auto &localSymbol{MakeSymbol(sym->name())};
+            localSymbol.set_details(UseDetails{sym->name(), *sym});
+            localSymbol.flags() = sym->flags();
+          }
+        }
+      }
+    }
   }
   return false;
 }
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 6ccd915c4dcbf..d51cc62d804e8 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
   return **cudaBuiltinsScope_;
 }
 
+const Scope &SemanticsContext::GetCUDADeviceScope() {
+  if (!cudaDeviceScope_) {
+    cudaDeviceScope_ = GetBuiltinModule("cudadevice");
+    CHECK(cudaDeviceScope_.value() != nullptr);
+  }
+  return **cudaDeviceScope_;
+}
+
 void SemanticsContext::UsePPCBuiltinsModule() {
   if (ppcBuiltinsScope_ == nullptr) {
     ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
new file mode 100644
index 0000000000000..f34820dd10792
--- /dev/null
+++ b/flang/module/cudadevice.f90
@@ -0,0 +1,74 @@
+!===-- module/cudedevice.f90 -----------------------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module cudadevice
+implicit none
+
+  ! Set PRIVATE by default to explicitly only export what is meant
+  ! to be exported by this MODULE.
+  private
+
+  ! Synchronization Functions
+
+  interface
+    attributes(device) subroutine syncthreads()
+    end subroutine
+  end interface
+  public :: syncthreads
+
+  interface
+    attributes(device) integer function syncthreads_and(value)
+      integer :: value
+    end function
+  end interface
+  public :: syncthreads_and
+
+  interface
+    attributes(device) integer function syncthreads_count(value)
+      integer :: value
+    end function
+  end interface
+  public :: syncthreads_count
+
+  interface
+    attributes(device) integer function syncthreads_or(value)
+      integer :: value
+    end function
+  end interface
+  public :: syncthreads_or
+
+  interface
+    attributes(device) subroutine syncwarp(mask)
+      integer :: mask
+    end subroutine
+  end interface
+  public :: syncwarp
+
+  ! Memory Fences
+
+  interface
+    attributes(device) subroutine threadfence()
+    end subroutine
+  end interface
+  public :: threadfence
+
+  interface
+    attributes(device) subroutine threadfence_block()
+    end subroutine
+  end interface
+  public :: threadfence_block
+
+  interface
+    attributes(device) subroutine threadfence_system()
+    end subroutine
+  end interface
+  public :: threadfence_system
+
+end module
diff --git a/flang/test/Semantics/cuf-device-procedures01.cuf b/flang/test/Semantics/cuf-device-procedures01.cuf
new file mode 100644
index 0000000000000..b9918d8a4ae4c
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures01.cuf
@@ -0,0 +1,35 @@
+! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
+
+! Test CUDA Fortran intrinsic can pass semantic
+
+attributes(global) subroutine devsub()
+  implicit none
+  integer :: ret
+
+  ! 3.6.4. Synchronization Functions
+  call syncthreads()
+  call syncwarp(1)
+  call threadfence()
+  call threadfence_block()
+  call threadfence_system()
+  ret = syncthreads_and(1)
+  ret = syncthreads_count(1)
+  ret = syncthreads_or(1)
+end
+
+! CHECK-LABEL: Subprogram scope: devsub
+! CHECK: syncthreads (Subroutine): Use from syncthreads in cudadevice
+! CHECK: syncthreads_and (Function): Use from syncthreads_and in cudadevice
+! CHECK: syncthreads_count (Function): Use from syncthreads_count in cudadevice
+! CHECK: syncthreads_or (Function): Use from syncthreads_or in cudadevice
+! CHECK: syncwarp (Subroutine): Use from syncwarp in cudadevice
+! CHECK: threadfence (Subroutine): Use from threadfence in cudadevice
+! CHECK: threadfence_block (Subroutine): Use from threadfence_block in cudadevice
+! CHECK: threadfence_system (Subroutine): Use from threadfence_system in cudadevice
+
+subroutine host()
+  call syncthreads()
+end subroutine
+
+! CHECK-LABEL: Subprogram scope: host
+! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
diff --git a/flang/test/Semantics/cuf-device-procedures02.cuf b/flang/test/Semantics/cuf-device-procedures02.cuf
new file mode 100644
index 0000000000000..c93fc4033b8f0
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures02.cuf
@@ -0,0 +1,17 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+module dev
+  integer, device :: syncthreads
+
+contains
+
+  attributes(device) subroutine sub1()
+    syncthreads = 1 ! syncthreads not overwritten by cudadevice
+  end subroutine
+
+  attributes(global) subroutine sub2()
+!ERROR: 'threadfence' is use-associated from module 'cudadevice' and cannot be re-declared
+    integer :: threadfence
+  end subroutine
+end module
+
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 64815a1f5da62..73046b9d538ea 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -12,6 +12,7 @@ set(MODULES
   "__ppc_intrinsics"
   "mma"
   "__cuda_builtins"
+  "cudadevice"
   "ieee_arithmetic"
   "ieee_exceptions"
   "ieee_features"
@@ -26,11 +27,14 @@ set(MODULES
 if (NOT CMAKE_CROSSCOMPILING)
   foreach(filename ${MODULES})
     set(depends "")
+    set(opts "")
     if(${filename} STREQUAL "__fortran_builtins" OR
        ${filename} STREQUAL "__ppc_types")
     elseif(${filename} STREQUAL "__ppc_intrinsics" OR
            ${filename} STREQUAL "mma")
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
+    elseif(${filename} STREQUAL "cudadevice")
+      set(opts "-xcuda")
     else()
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
       if(NOT ${filename} STREQUAL "__fortran_type_info")
@@ -43,9 +47,8 @@ if (NOT CMAKE_CROSSCOMPILING)
     endif()
 
     # The module contains PPC vector types that needs the PPC target.
-    set(opts "")
-      if(${filename} STREQUAL "__ppc_intrinsics" OR
-         ${filename} STREQUAL "mma")
+    if(${filename} STREQUAL "__ppc_intrinsics" OR
+       ${filename} STREQUAL "mma")
       if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD)
         set(opts "--target=ppc64le")
       else()
@@ -58,7 +61,7 @@ if (NOT CMAKE_CROSSCOMPILING)
     # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support
     add_custom_command(OUTPUT ${base}.mod
       COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
-      COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
+      COMMAND flang-new -fc1 -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
         ${FLANG_SOURCE_DIR}/module/${filename}.f90
       DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
     )

>From ad477362a0fb761ff69f6daa567365bce3009ef4 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 13 May 2024 15:18:03 -0700
Subject: [PATCH 2/7] Use overloaded MakeSymbol

---
 flang/lib/Semantics/resolve-names.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 1ade71805b70d..d2503a053e669 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3810,8 +3810,8 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
         const Scope &cudaDeviceScope{context().GetCUDADeviceScope()};
         for (auto sym : cudaDeviceScope.GetSymbols()) {
           if (!currScope().FindSymbol(sym->name())) {
-            auto &localSymbol{MakeSymbol(sym->name())};
-            localSymbol.set_details(UseDetails{sym->name(), *sym});
+            auto &localSymbol{MakeSymbol(
+                sym->name(), Attrs{}, UseDetails{sym->name(), *sym})};
             localSymbol.flags() = sym->flags();
           }
         }

>From 9c3ab1a281ff3edae207cd4ec9c34853ca82d5a1 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 14 May 2024 10:31:22 -0700
Subject: [PATCH 3/7] Add missing dep

---
 flang/tools/f18/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 73046b9d538ea..a32766d49441f 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -35,6 +35,7 @@ if (NOT CMAKE_CROSSCOMPILING)
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
     elseif(${filename} STREQUAL "cudadevice")
       set(opts "-xcuda")
+      set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
     else()
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
       if(NOT ${filename} STREQUAL "__fortran_type_info")

>From e0d19444e9efc26b36814d4b6bca950c252c9d0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Tue, 14 May 2024 14:01:50 -0700
Subject: [PATCH 4/7] Update flang/tools/f18/CMakeLists.txt

---
 flang/tools/f18/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index a32766d49441f..96f22c15258fc 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -62,7 +62,7 @@ if (NOT CMAKE_CROSSCOMPILING)
     # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support
     add_custom_command(OUTPUT ${base}.mod
       COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
-      COMMAND flang-new -fc1 -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
+      COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
         ${FLANG_SOURCE_DIR}/module/${filename}.f90
       DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
     )

>From 47476eb16dac0c1e0cdb8744fa95123be575dff0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Tue, 14 May 2024 15:16:29 -0700
Subject: [PATCH 5/7] Update flang/tools/f18/CMakeLists.txt

---
 flang/tools/f18/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 96f22c15258fc..6ac30eb7ccb21 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -34,7 +34,7 @@ if (NOT CMAKE_CROSSCOMPILING)
            ${filename} STREQUAL "mma")
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
     elseif(${filename} STREQUAL "cudadevice")
-      set(opts "-xcuda")
+      set(opts "-xcuda -nocudainc")
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
     else()
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)

>From 1650b763e6b6035ec67dd4ee9f8364c452b4659b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Tue, 14 May 2024 21:16:33 -0700
Subject: [PATCH 6/7] Update flang/tools/f18/CMakeLists.txt

Try to fix flang-new syntax-only execution
---
 flang/tools/f18/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 6ac30eb7ccb21..1d8e89b40ea09 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -34,7 +34,7 @@ if (NOT CMAKE_CROSSCOMPILING)
            ${filename} STREQUAL "mma")
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
     elseif(${filename} STREQUAL "cudadevice")
-      set(opts "-xcuda -nocudainc")
+      set(opts -xcuda -nocudainc)
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
     else()
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)

>From cda4094595d24dc71b189779e3768e6624ed8700 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 17 May 2024 13:33:43 -0700
Subject: [PATCH 7/7] Use -fc1 with -x cuda

---
 flang/tools/f18/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 1d8e89b40ea09..35e1cdafd3ae3 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -34,7 +34,7 @@ if (NOT CMAKE_CROSSCOMPILING)
            ${filename} STREQUAL "mma")
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
     elseif(${filename} STREQUAL "cudadevice")
-      set(opts -xcuda -nocudainc)
+      set(opts -fc1 -xcuda)
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
     else()
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
@@ -62,7 +62,7 @@ if (NOT CMAKE_CROSSCOMPILING)
     # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support
     add_custom_command(OUTPUT ${base}.mod
       COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
-      COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
+      COMMAND flang-new ${opts} -cpp -fsyntax-only -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
         ${FLANG_SOURCE_DIR}/module/${filename}.f90
       DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
     )



More information about the flang-commits mailing list