[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)
Jan Patrick Lehr via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 15 01:24:11 PST 2023
https://github.com/jplehr updated https://github.com/llvm/llvm-project/pull/75468
>From 4ecd07d786a5a994b33b9177d4e21d839bfe3fc9 Mon Sep 17 00:00:00 2001
From: JP Lehr <JanPatrick.Lehr at amd.com>
Date: Thu, 6 Jul 2023 16:47:21 -0400
Subject: [PATCH] [OpenMP] Introduce -fopenmp-force-usm flag
The new flag implements logic to include #pragma omp requires
unified_shared_memory in every translation unit.
This enables a straightforward way to enable USM for an application
without the need to modify sources.
---
clang/include/clang/Driver/Options.td | 2 ++
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 ++++++++++++++++
clang/lib/Headers/CMakeLists.txt | 1 +
.../lib/Headers/openmp_wrappers/usm/force_usm.h | 6 ++++++
4 files changed, 25 insertions(+)
create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1b02087425b751..73325d5620cc10 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
+ Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
//===----------------------------------------------------------------------===//
// Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..a077f2f06d7728 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+ CC1Args.push_back("-internal-isystem");
+ SmallString<128> P(HostTC.getDriver().ResourceDir);
+ llvm::sys::path::append(P, "include/cuda_wrappers");
+ CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+ // Force USM mode will forcefully include #pragma omp requires
+ // unified_shared_memory via the force_usm header
+ // XXX This may result in a compilation error if the source
+ // file already includes that pragma.
+ if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+ CC1Args.push_back("-include");
+ CC1Args.push_back(
+ DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+ }
}
void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f8fdd402777e48..aac232fa8b4405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -319,6 +319,7 @@ set(openmp_wrapper_files
openmp_wrappers/__clang_openmp_device_functions.h
openmp_wrappers/complex_cmath.h
openmp_wrappers/new
+ openmp_wrappers/usm/force_usm.h
)
set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00000000000000..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif
More information about the cfe-commits
mailing list