[flang-commits] [flang] 6d50a79 - [flang][cuda] Implicitly load cudadevice module in device/global subprogram (#92038)
via flang-commits
flang-commits at lists.llvm.org
Fri May 17 13:58:52 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-05-17T13:58:49-07:00
New Revision: 6d50a79b721760fa5e10b81ebe6d1e37f141f61b
URL: https://github.com/llvm/llvm-project/commit/6d50a79b721760fa5e10b81ebe6d1e37f141f61b
DIFF: https://github.com/llvm/llvm-project/commit/6d50a79b721760fa5e10b81ebe6d1e37f141f61b.diff
LOG: [flang][cuda] Implicitly load cudadevice module in device/global subprogram (#92038)
This is a re-worked version of #91668. It adds the `cudadevice` module
and set the `device` attributes on its functions/subroutines so there is
no need for special case in semantic check.
`cudadevice` module is implicitly USE'd in `global`/`device` subprogram.
Added:
flang/module/cudadevice.f90
flang/test/Semantics/cuf-device-procedures01.cuf
flang/test/Semantics/cuf-device-procedures02.cuf
Modified:
flang/include/flang/Semantics/semantics.h
flang/lib/Semantics/resolve-names.cpp
flang/lib/Semantics/semantics.cpp
flang/tools/f18/CMakeLists.txt
Removed:
################################################################################
diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index e6ba71d53e92b..167e613816394 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -215,8 +215,10 @@ class SemanticsContext {
void UseFortranBuiltinsModule();
const Scope *GetBuiltinsScope() const { return builtinsScope_; }
- void UsePPCBuiltinTypesModule();
const Scope &GetCUDABuiltinsScope();
+ const Scope &GetCUDADeviceScope();
+
+ void UsePPCBuiltinTypesModule();
void UsePPCBuiltinsModule();
Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
+ std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
std::list<parser::Program> modFileParseTrees_;
std::unique_ptr<CommonBlockMap> commonBlockMap_;
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 40eee89de131a..a46c0f378d5d0 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3797,6 +3797,26 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
subp->set_cudaSubprogramAttrs(attr);
}
}
+ if (auto attrs{subp->cudaSubprogramAttrs()}) {
+ if (*attrs == common::CUDASubprogramAttrs::Global ||
+ *attrs == common::CUDASubprogramAttrs::Device) {
+ const Scope &scope{currScope()};
+ const Scope *mod{FindModuleContaining(scope)};
+ if (mod && mod->GetName().value() == "cudadevice") {
+ return false;
+ }
+ // Implicitly USE the cudadevice module by copying its symbols in the
+ // current scope.
+ const Scope &cudaDeviceScope{context().GetCUDADeviceScope()};
+ for (auto sym : cudaDeviceScope.GetSymbols()) {
+ if (!currScope().FindSymbol(sym->name())) {
+ auto &localSymbol{MakeSymbol(
+ sym->name(), Attrs{}, UseDetails{sym->name(), *sym})};
+ localSymbol.flags() = sym->flags();
+ }
+ }
+ }
+ }
}
return false;
}
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 6ccd915c4dcbf..d51cc62d804e8 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
return **cudaBuiltinsScope_;
}
+const Scope &SemanticsContext::GetCUDADeviceScope() {
+ if (!cudaDeviceScope_) {
+ cudaDeviceScope_ = GetBuiltinModule("cudadevice");
+ CHECK(cudaDeviceScope_.value() != nullptr);
+ }
+ return **cudaDeviceScope_;
+}
+
void SemanticsContext::UsePPCBuiltinsModule() {
if (ppcBuiltinsScope_ == nullptr) {
ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
new file mode 100644
index 0000000000000..f34820dd10792
--- /dev/null
+++ b/flang/module/cudadevice.f90
@@ -0,0 +1,74 @@
+!===-- module/cudedevice.f90 -----------------------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module cudadevice
+implicit none
+
+ ! Set PRIVATE by default to explicitly only export what is meant
+ ! to be exported by this MODULE.
+ private
+
+ ! Synchronization Functions
+
+ interface
+ attributes(device) subroutine syncthreads()
+ end subroutine
+ end interface
+ public :: syncthreads
+
+ interface
+ attributes(device) integer function syncthreads_and(value)
+ integer :: value
+ end function
+ end interface
+ public :: syncthreads_and
+
+ interface
+ attributes(device) integer function syncthreads_count(value)
+ integer :: value
+ end function
+ end interface
+ public :: syncthreads_count
+
+ interface
+ attributes(device) integer function syncthreads_or(value)
+ integer :: value
+ end function
+ end interface
+ public :: syncthreads_or
+
+ interface
+ attributes(device) subroutine syncwarp(mask)
+ integer :: mask
+ end subroutine
+ end interface
+ public :: syncwarp
+
+ ! Memory Fences
+
+ interface
+ attributes(device) subroutine threadfence()
+ end subroutine
+ end interface
+ public :: threadfence
+
+ interface
+ attributes(device) subroutine threadfence_block()
+ end subroutine
+ end interface
+ public :: threadfence_block
+
+ interface
+ attributes(device) subroutine threadfence_system()
+ end subroutine
+ end interface
+ public :: threadfence_system
+
+end module
diff --git a/flang/test/Semantics/cuf-device-procedures01.cuf b/flang/test/Semantics/cuf-device-procedures01.cuf
new file mode 100644
index 0000000000000..b9918d8a4ae4c
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures01.cuf
@@ -0,0 +1,35 @@
+! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
+
+! Test CUDA Fortran intrinsic can pass semantic
+
+attributes(global) subroutine devsub()
+ implicit none
+ integer :: ret
+
+ ! 3.6.4. Synchronization Functions
+ call syncthreads()
+ call syncwarp(1)
+ call threadfence()
+ call threadfence_block()
+ call threadfence_system()
+ ret = syncthreads_and(1)
+ ret = syncthreads_count(1)
+ ret = syncthreads_or(1)
+end
+
+! CHECK-LABEL: Subprogram scope: devsub
+! CHECK: syncthreads (Subroutine): Use from syncthreads in cudadevice
+! CHECK: syncthreads_and (Function): Use from syncthreads_and in cudadevice
+! CHECK: syncthreads_count (Function): Use from syncthreads_count in cudadevice
+! CHECK: syncthreads_or (Function): Use from syncthreads_or in cudadevice
+! CHECK: syncwarp (Subroutine): Use from syncwarp in cudadevice
+! CHECK: threadfence (Subroutine): Use from threadfence in cudadevice
+! CHECK: threadfence_block (Subroutine): Use from threadfence_block in cudadevice
+! CHECK: threadfence_system (Subroutine): Use from threadfence_system in cudadevice
+
+subroutine host()
+ call syncthreads()
+end subroutine
+
+! CHECK-LABEL: Subprogram scope: host
+! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
diff --git a/flang/test/Semantics/cuf-device-procedures02.cuf b/flang/test/Semantics/cuf-device-procedures02.cuf
new file mode 100644
index 0000000000000..c93fc4033b8f0
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures02.cuf
@@ -0,0 +1,17 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+module dev
+ integer, device :: syncthreads
+
+contains
+
+ attributes(device) subroutine sub1()
+ syncthreads = 1 ! syncthreads not overwritten by cudadevice
+ end subroutine
+
+ attributes(global) subroutine sub2()
+!ERROR: 'threadfence' is use-associated from module 'cudadevice' and cannot be re-declared
+ integer :: threadfence
+ end subroutine
+end module
+
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 64815a1f5da62..35e1cdafd3ae3 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -12,6 +12,7 @@ set(MODULES
"__ppc_intrinsics"
"mma"
"__cuda_builtins"
+ "cudadevice"
"ieee_arithmetic"
"ieee_exceptions"
"ieee_features"
@@ -26,11 +27,15 @@ set(MODULES
if (NOT CMAKE_CROSSCOMPILING)
foreach(filename ${MODULES})
set(depends "")
+ set(opts "")
if(${filename} STREQUAL "__fortran_builtins" OR
${filename} STREQUAL "__ppc_types")
elseif(${filename} STREQUAL "__ppc_intrinsics" OR
${filename} STREQUAL "mma")
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
+ elseif(${filename} STREQUAL "cudadevice")
+ set(opts -fc1 -xcuda)
+ set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
else()
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
if(NOT ${filename} STREQUAL "__fortran_type_info")
@@ -43,9 +48,8 @@ if (NOT CMAKE_CROSSCOMPILING)
endif()
# The module contains PPC vector types that needs the PPC target.
- set(opts "")
- if(${filename} STREQUAL "__ppc_intrinsics" OR
- ${filename} STREQUAL "mma")
+ if(${filename} STREQUAL "__ppc_intrinsics" OR
+ ${filename} STREQUAL "mma")
if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD)
set(opts "--target=ppc64le")
else()
@@ -58,7 +62,7 @@ if (NOT CMAKE_CROSSCOMPILING)
# TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support
add_custom_command(OUTPUT ${base}.mod
COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
- COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
+ COMMAND flang-new ${opts} -cpp -fsyntax-only -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
${FLANG_SOURCE_DIR}/module/${filename}.f90
DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
)
More information about the flang-commits
mailing list