[flang-commits] [flang] [flang][cuda] Implicitly load cudadevice module in device/global subprogram (PR #91668)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Fri May 10 09:27:19 PDT 2024


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/91668

>From ade9d8153cd01db0b0a84656272ea6fb2daf39fe Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 9 May 2024 14:53:52 -0700
Subject: [PATCH 1/4] [flang][cuda] Implicitly load cudadevice module in
 device/global function

---
 flang/include/flang/Semantics/semantics.h     |  5 +-
 flang/lib/Semantics/check-cuda.cpp            |  4 +
 flang/lib/Semantics/resolve-names.cpp         | 11 +++
 flang/lib/Semantics/semantics.cpp             |  8 ++
 flang/module/__cuda_device_builtins.f90       | 74 +++++++++++++++++++
 flang/module/cudadevice.f90                   | 21 ++++++
 .../test/Semantics/cuf-device-procedures.cuf  | 35 +++++++++
 flang/tools/f18/CMakeLists.txt                |  2 +
 8 files changed, 159 insertions(+), 1 deletion(-)
 create mode 100644 flang/module/__cuda_device_builtins.f90
 create mode 100644 flang/module/cudadevice.f90
 create mode 100644 flang/test/Semantics/cuf-device-procedures.cuf

diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index e6ba71d53e92b..367c9224df974 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -214,9 +214,11 @@ class SemanticsContext {
   // Defines builtinsScope_ from the __Fortran_builtins module
   void UseFortranBuiltinsModule();
   const Scope *GetBuiltinsScope() const { return builtinsScope_; }
+  
+  const Scope &GetCUDABuiltinsScope();
+  const Scope &GetCUDADeviceScope();
 
   void UsePPCBuiltinTypesModule();
-  const Scope &GetCUDABuiltinsScope();
   void UsePPCBuiltinsModule();
   Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
   const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
   const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
   Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
   std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
+  std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
   const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
   std::list<parser::Program> modFileParseTrees_;
   std::unique_ptr<CommonBlockMap> commonBlockMap_;
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 96ab902392633..6c32db4dbd1b3 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -82,6 +82,10 @@ struct DeviceExprChecker
           }
         }
       }
+      if (sym->owner().IsModule() &&
+          DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") {
+        return {};
+      }
     } else if (x.GetSpecificIntrinsic()) {
       // TODO(CUDA): Check for unsupported intrinsics here
       return {};
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 61394b0f41de7..16c555ff668aa 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3797,6 +3797,17 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
         subp->set_cudaSubprogramAttrs(attr);
       }
     }
+    if (auto attrs{subp->cudaSubprogramAttrs()}) {
+      if (*attrs == common::CUDASubprogramAttrs::Global ||
+          *attrs == common::CUDASubprogramAttrs::Device) {
+        // Implicitly USE the cudadevice module by copying its symbol in the
+        // current scope.
+        const Scope &scope{context().GetCUDADeviceScope()};
+        for (auto sym : scope.GetSymbols()) {
+          currScope().CopySymbol(sym);
+        }
+      }
+    }
   }
   return false;
 }
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 6ccd915c4dcbf..d51cc62d804e8 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
   return **cudaBuiltinsScope_;
 }
 
+const Scope &SemanticsContext::GetCUDADeviceScope() {
+  if (!cudaDeviceScope_) {
+    cudaDeviceScope_ = GetBuiltinModule("cudadevice");
+    CHECK(cudaDeviceScope_.value() != nullptr);
+  }
+  return **cudaDeviceScope_;
+}
+
 void SemanticsContext::UsePPCBuiltinsModule() {
   if (ppcBuiltinsScope_ == nullptr) {
     ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
diff --git a/flang/module/__cuda_device_builtins.f90 b/flang/module/__cuda_device_builtins.f90
new file mode 100644
index 0000000000000..738dc97242f2b
--- /dev/null
+++ b/flang/module/__cuda_device_builtins.f90
@@ -0,0 +1,74 @@
+!===-- module/__cuda_device_builtins.f90 -----------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module __CUDA_device_builtins  
+
+  implicit none
+
+  ! Set PRIVATE by default to explicitly only export what is meant
+  ! to be exported by this MODULE.
+  private
+
+  ! Synchronization Functions
+
+  interface
+    subroutine __cuda_device_builtins_syncthreads()
+    end subroutine
+  end interface
+  public :: __cuda_device_builtins_syncthreads
+
+  interface
+    integer function __cuda_device_builtins_syncthreads_and(value)
+      integer :: value
+    end function
+  end interface
+  public :: __cuda_device_builtins_syncthreads_and
+
+  interface
+    integer function __cuda_device_builtins_syncthreads_count(value)
+      integer :: value
+    end function
+  end interface
+  public :: __cuda_device_builtins_syncthreads_count
+
+  interface
+    integer function __cuda_device_builtins_syncthreads_or(int_value)
+    end function
+  end interface
+  public :: __cuda_device_builtins_syncthreads_or
+
+  interface
+    subroutine __cuda_device_builtins_syncwarp(mask)
+      integer :: mask
+    end subroutine
+  end interface
+  public :: __cuda_device_builtins_syncwarp
+
+  ! Memory Fences
+
+  interface
+    subroutine __cuda_device_builtins_threadfence()
+    end subroutine
+  end interface
+  public :: __cuda_device_builtins_threadfence
+
+  interface
+    subroutine __cuda_device_builtins_threadfence_block()
+    end subroutine
+  end interface
+  public :: __cuda_device_builtins_threadfence_block
+
+  interface
+    subroutine __cuda_device_builtins_threadfence_system()
+    end subroutine
+  end interface
+  public :: __cuda_device_builtins_threadfence_system
+
+end module
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
new file mode 100644
index 0000000000000..b635d77ea4529
--- /dev/null
+++ b/flang/module/cudadevice.f90
@@ -0,0 +1,21 @@
+!===-- module/cudedevice.f90 -----------------------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module cudadevice
+  use __cuda_device_builtins, only: &
+    syncthreads => __cuda_device_builtins_syncthreads, &
+    syncthreads_and => __cuda_device_builtins_syncthreads_and, &
+    syncthreads_count => __cuda_device_builtins_syncthreads_count, &
+    syncthreads_or => __cuda_device_builtins_syncthreads_or, &
+    syncwarp => __cuda_device_builtins_syncwarp, &
+    threadfence => __cuda_device_builtins_threadfence, &
+    threadfence_block => __cuda_device_builtins_threadfence_block, &
+    threadfence_system => __cuda_device_builtins_threadfence_system
+end module
diff --git a/flang/test/Semantics/cuf-device-procedures.cuf b/flang/test/Semantics/cuf-device-procedures.cuf
new file mode 100644
index 0000000000000..e79423e3587a1
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures.cuf
@@ -0,0 +1,35 @@
+! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
+
+! Test CUDA Fortran intrinsic can pass semantic
+
+attributes(global) subroutine devsub()
+  implicit none
+  integer :: ret
+
+  ! 3.6.4. Synchronization Functions
+  call syncthreads()
+  call syncwarp(1)
+  call threadfence()
+  call threadfence_block()
+  call threadfence_system()
+  ret = syncthreads_and(1)
+  ret = syncthreads_count(1)
+  ret = syncthreads_or(1)
+end
+
+! CHECK-LABEL: Subprogram scope: devsub
+! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins
+! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins
+! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins
+! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins
+! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins
+! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins
+! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins
+! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins
+
+subroutine host()
+  call syncthreads()
+end subroutine
+
+! CHECK-LABEL: Subprogram scope: host
+! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 64815a1f5da62..e5cf945d1f118 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -12,6 +12,8 @@ set(MODULES
   "__ppc_intrinsics"
   "mma"
   "__cuda_builtins"
+  "__cuda_device_builtins"
+  "cudadevice"
   "ieee_arithmetic"
   "ieee_exceptions"
   "ieee_features"

>From 5eb7d79928441d1d9e4f9b6a30644366b9022385 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 9 May 2024 15:02:57 -0700
Subject: [PATCH 2/4] clang-format

---
 flang/include/flang/Semantics/semantics.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index 367c9224df974..167e613816394 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -214,7 +214,7 @@ class SemanticsContext {
   // Defines builtinsScope_ from the __Fortran_builtins module
   void UseFortranBuiltinsModule();
   const Scope *GetBuiltinsScope() const { return builtinsScope_; }
-  
+
   const Scope &GetCUDABuiltinsScope();
   const Scope &GetCUDADeviceScope();
 

>From 462a4a0cb08ad90cd84642f0a1d591bad02cb8d2 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 9 May 2024 15:45:29 -0700
Subject: [PATCH 3/4] Address comments

---
 flang/lib/Semantics/check-cuda.cpp              |  1 +
 flang/lib/Semantics/resolve-names.cpp           |  6 ++++--
 ...ocedures.cuf => cuf-device-procedures01.cuf} |  0
 .../test/Semantics/cuf-device-procedures02.cuf  | 17 +++++++++++++++++
 4 files changed, 22 insertions(+), 2 deletions(-)
 rename flang/test/Semantics/{cuf-device-procedures.cuf => cuf-device-procedures01.cuf} (100%)
 create mode 100644 flang/test/Semantics/cuf-device-procedures02.cuf

diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 6c32db4dbd1b3..2830d5f0be6ea 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -83,6 +83,7 @@ struct DeviceExprChecker
         }
       }
       if (sym->owner().IsModule() &&
+          sym->owner().parent().IsIntrinsicModules() &&
           DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") {
         return {};
       }
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 16c555ff668aa..9e32463fa54b9 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3800,11 +3800,13 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
     if (auto attrs{subp->cudaSubprogramAttrs()}) {
       if (*attrs == common::CUDASubprogramAttrs::Global ||
           *attrs == common::CUDASubprogramAttrs::Device) {
-        // Implicitly USE the cudadevice module by copying its symbol in the
+        // Implicitly USE the cudadevice module by copying its symbols in the
         // current scope.
         const Scope &scope{context().GetCUDADeviceScope()};
         for (auto sym : scope.GetSymbols()) {
-          currScope().CopySymbol(sym);
+          if (!currScope().FindSymbol(sym->name())) {
+            currScope().CopySymbol(sym);
+          }
         }
       }
     }
diff --git a/flang/test/Semantics/cuf-device-procedures.cuf b/flang/test/Semantics/cuf-device-procedures01.cuf
similarity index 100%
rename from flang/test/Semantics/cuf-device-procedures.cuf
rename to flang/test/Semantics/cuf-device-procedures01.cuf
diff --git a/flang/test/Semantics/cuf-device-procedures02.cuf b/flang/test/Semantics/cuf-device-procedures02.cuf
new file mode 100644
index 0000000000000..ea6a094ed5c38
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures02.cuf
@@ -0,0 +1,17 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+module dev
+  integer, device :: syncthreads
+
+contains
+
+  attributes(device) subroutine sub1()
+    syncthreads = 1 ! syncthreads not overwritten by cudadevice
+  end subroutine
+
+  attributes(global) subroutine sub2()
+!ERROR: 'threadfence' is use-associated from module '__cuda_device_builtins' and cannot be re-declared
+    integer :: threadfence
+  end subroutine
+end module
+

>From d12c3f23510455cf419eae9f7c93c5ae916c7c58 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 10 May 2024 09:27:06 -0700
Subject: [PATCH 4/4] Add module dependency

---
 flang/tools/f18/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index e5cf945d1f118..0222654c8e5d8 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -33,6 +33,8 @@ if (NOT CMAKE_CROSSCOMPILING)
     elseif(${filename} STREQUAL "__ppc_intrinsics" OR
            ${filename} STREQUAL "mma")
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
+    elseif(${filename} STREQUAL "cudadevice")
+      set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device_builtins.mod)
     else()
       set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
       if(NOT ${filename} STREQUAL "__fortran_type_info")



More information about the flang-commits mailing list