[flang-commits] [flang] [flang][cuda] Implicitly load cudadevice module in device/global subprogram (PR #91668)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu May 9 14:57:42 PDT 2024
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/91668
Some functions and subroutines are available in device context (device/global). These functions have interfaces declared in the `cudadevice` module.
This patch adds interfaces as `__cuda_device_builtins_<fctname>` in a builtin module and they are USE'd rename in the `cudadevice` module. The module is implicitly used in device/global subprograms.
The builtin module only contains procedures from section 3.6.4 for now.
>From ade9d8153cd01db0b0a84656272ea6fb2daf39fe Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 9 May 2024 14:53:52 -0700
Subject: [PATCH] [flang][cuda] Implicitly load cudadevice module in
device/global function
---
flang/include/flang/Semantics/semantics.h | 5 +-
flang/lib/Semantics/check-cuda.cpp | 4 +
flang/lib/Semantics/resolve-names.cpp | 11 +++
flang/lib/Semantics/semantics.cpp | 8 ++
flang/module/__cuda_device_builtins.f90 | 74 +++++++++++++++++++
flang/module/cudadevice.f90 | 21 ++++++
.../test/Semantics/cuf-device-procedures.cuf | 35 +++++++++
flang/tools/f18/CMakeLists.txt | 2 +
8 files changed, 159 insertions(+), 1 deletion(-)
create mode 100644 flang/module/__cuda_device_builtins.f90
create mode 100644 flang/module/cudadevice.f90
create mode 100644 flang/test/Semantics/cuf-device-procedures.cuf
diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index e6ba71d53e92b..367c9224df974 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -214,9 +214,11 @@ class SemanticsContext {
// Defines builtinsScope_ from the __Fortran_builtins module
void UseFortranBuiltinsModule();
const Scope *GetBuiltinsScope() const { return builtinsScope_; }
+
+ const Scope &GetCUDABuiltinsScope();
+ const Scope &GetCUDADeviceScope();
void UsePPCBuiltinTypesModule();
- const Scope &GetCUDABuiltinsScope();
void UsePPCBuiltinsModule();
Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
+ std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
std::list<parser::Program> modFileParseTrees_;
std::unique_ptr<CommonBlockMap> commonBlockMap_;
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 96ab902392633..6c32db4dbd1b3 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -82,6 +82,10 @@ struct DeviceExprChecker
}
}
}
+ if (sym->owner().IsModule() &&
+ DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") {
+ return {};
+ }
} else if (x.GetSpecificIntrinsic()) {
// TODO(CUDA): Check for unsupported intrinsics here
return {};
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 61394b0f41de7..16c555ff668aa 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3797,6 +3797,17 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
subp->set_cudaSubprogramAttrs(attr);
}
}
+ if (auto attrs{subp->cudaSubprogramAttrs()}) {
+ if (*attrs == common::CUDASubprogramAttrs::Global ||
+ *attrs == common::CUDASubprogramAttrs::Device) {
+ // Implicitly USE the cudadevice module by copying its symbol in the
+ // current scope.
+ const Scope &scope{context().GetCUDADeviceScope()};
+ for (auto sym : scope.GetSymbols()) {
+ currScope().CopySymbol(sym);
+ }
+ }
+ }
}
return false;
}
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 6ccd915c4dcbf..d51cc62d804e8 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
return **cudaBuiltinsScope_;
}
+const Scope &SemanticsContext::GetCUDADeviceScope() {
+ if (!cudaDeviceScope_) {
+ cudaDeviceScope_ = GetBuiltinModule("cudadevice");
+ CHECK(cudaDeviceScope_.value() != nullptr);
+ }
+ return **cudaDeviceScope_;
+}
+
void SemanticsContext::UsePPCBuiltinsModule() {
if (ppcBuiltinsScope_ == nullptr) {
ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
diff --git a/flang/module/__cuda_device_builtins.f90 b/flang/module/__cuda_device_builtins.f90
new file mode 100644
index 0000000000000..738dc97242f2b
--- /dev/null
+++ b/flang/module/__cuda_device_builtins.f90
@@ -0,0 +1,74 @@
+!===-- module/__cuda_device_builtins.f90 -----------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module __CUDA_device_builtins
+
+ implicit none
+
+ ! Set PRIVATE by default to explicitly only export what is meant
+ ! to be exported by this MODULE.
+ private
+
+ ! Synchronization Functions
+
+ interface
+ subroutine __cuda_device_builtins_syncthreads()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_syncthreads
+
+ interface
+ integer function __cuda_device_builtins_syncthreads_and(value)
+ integer :: value
+ end function
+ end interface
+ public :: __cuda_device_builtins_syncthreads_and
+
+ interface
+ integer function __cuda_device_builtins_syncthreads_count(value)
+ integer :: value
+ end function
+ end interface
+ public :: __cuda_device_builtins_syncthreads_count
+
+ interface
+ integer function __cuda_device_builtins_syncthreads_or(int_value)
+ end function
+ end interface
+ public :: __cuda_device_builtins_syncthreads_or
+
+ interface
+ subroutine __cuda_device_builtins_syncwarp(mask)
+ integer :: mask
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_syncwarp
+
+ ! Memory Fences
+
+ interface
+ subroutine __cuda_device_builtins_threadfence()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_threadfence
+
+ interface
+ subroutine __cuda_device_builtins_threadfence_block()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_threadfence_block
+
+ interface
+ subroutine __cuda_device_builtins_threadfence_system()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_threadfence_system
+
+end module
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
new file mode 100644
index 0000000000000..b635d77ea4529
--- /dev/null
+++ b/flang/module/cudadevice.f90
@@ -0,0 +1,21 @@
+!===-- module/cudedevice.f90 -----------------------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module cudadevice
+ use __cuda_device_builtins, only: &
+ syncthreads => __cuda_device_builtins_syncthreads, &
+ syncthreads_and => __cuda_device_builtins_syncthreads_and, &
+ syncthreads_count => __cuda_device_builtins_syncthreads_count, &
+ syncthreads_or => __cuda_device_builtins_syncthreads_or, &
+ syncwarp => __cuda_device_builtins_syncwarp, &
+ threadfence => __cuda_device_builtins_threadfence, &
+ threadfence_block => __cuda_device_builtins_threadfence_block, &
+ threadfence_system => __cuda_device_builtins_threadfence_system
+end module
diff --git a/flang/test/Semantics/cuf-device-procedures.cuf b/flang/test/Semantics/cuf-device-procedures.cuf
new file mode 100644
index 0000000000000..e79423e3587a1
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures.cuf
@@ -0,0 +1,35 @@
+! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
+
+! Test CUDA Fortran intrinsic can pass semantic
+
+attributes(global) subroutine devsub()
+ implicit none
+ integer :: ret
+
+ ! 3.6.4. Synchronization Functions
+ call syncthreads()
+ call syncwarp(1)
+ call threadfence()
+ call threadfence_block()
+ call threadfence_system()
+ ret = syncthreads_and(1)
+ ret = syncthreads_count(1)
+ ret = syncthreads_or(1)
+end
+
+! CHECK-LABEL: Subprogram scope: devsub
+! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins
+! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins
+! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins
+! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins
+! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins
+! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins
+! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins
+! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins
+
+subroutine host()
+ call syncthreads()
+end subroutine
+
+! CHECK-LABEL: Subprogram scope: host
+! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 64815a1f5da62..e5cf945d1f118 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -12,6 +12,8 @@ set(MODULES
"__ppc_intrinsics"
"mma"
"__cuda_builtins"
+ "__cuda_device_builtins"
+ "cudadevice"
"ieee_arithmetic"
"ieee_exceptions"
"ieee_features"
More information about the flang-commits
mailing list