[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)
Jan Patrick Lehr via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 29 08:15:39 PST 2023
https://github.com/jplehr updated https://github.com/llvm/llvm-project/pull/75468
>From 8f381c760fca8a4abd7550c492ff22fa8972933a Mon Sep 17 00:00:00 2001
From: JP Lehr <JanPatrick.Lehr at amd.com>
Date: Thu, 6 Jul 2023 16:47:21 -0400
Subject: [PATCH 1/3] [OpenMP] Introduce -fopenmp-force-usm flag
The new flag implements logic to include #pragma omp requires
unified_shared_memory in every translation unit.
This enables a straightforward way to enable USM for an application
without the need to modify sources.
---
clang/include/clang/Driver/Options.td | 2 ++
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 ++++++++++++++++
clang/lib/Headers/CMakeLists.txt | 1 +
.../lib/Headers/openmp_wrappers/usm/force_usm.h | 6 ++++++
4 files changed, 25 insertions(+)
create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..e33bc7d1b10d71 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
+ Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
//===----------------------------------------------------------------------===//
// Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..a077f2f06d7728 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+ CC1Args.push_back("-internal-isystem");
+ SmallString<128> P(HostTC.getDriver().ResourceDir);
+ llvm::sys::path::append(P, "include/cuda_wrappers");
+ CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+ // Force USM mode will forcefully include #pragma omp requires
+ // unified_shared_memory via the force_usm header
+ // XXX This may result in a compilation error if the source
+ // file already includes that pragma.
+ if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+ CC1Args.push_back("-include");
+ CC1Args.push_back(
+ DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+ "/include/openmp_wrappers/force_usm.h"));
+ }
}
void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 735e4e4e3be89b..ed491779abcd00 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -320,6 +320,7 @@ set(openmp_wrapper_files
openmp_wrappers/__clang_openmp_device_functions.h
openmp_wrappers/complex_cmath.h
openmp_wrappers/new
+ openmp_wrappers/usm/force_usm.h
)
set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00000000000000..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif
>From 4d5a1f670b3bdd5b183515e347610414cb12cb90 Mon Sep 17 00:00:00 2001
From: JP Lehr <JanPatrick.Lehr at amd.com>
Date: Fri, 29 Dec 2023 04:33:19 -0500
Subject: [PATCH 2/3] Revert "[OpenMP] Introduce -fopenmp-force-usm flag"
This reverts commit 4ecd07d786a5a994b33b9177d4e21d839bfe3fc9.
To test the other solution.
---
clang/include/clang/Driver/Options.td | 2 --
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 16 ----------------
clang/lib/Headers/CMakeLists.txt | 1 -
.../lib/Headers/openmp_wrappers/usm/force_usm.h | 6 ------
4 files changed, 25 deletions(-)
delete mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e33bc7d1b10d71..2b93ddf033499c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,8 +3381,6 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
-def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
- Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
//===----------------------------------------------------------------------===//
// Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index a077f2f06d7728..b012b7cb729378 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,22 +129,6 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
-
- CC1Args.push_back("-internal-isystem");
- SmallString<128> P(HostTC.getDriver().ResourceDir);
- llvm::sys::path::append(P, "include/cuda_wrappers");
- CC1Args.push_back(DriverArgs.MakeArgString(P));
-
- // Force USM mode will forcefully include #pragma omp requires
- // unified_shared_memory via the force_usm header
- // XXX This may result in a compilation error if the source
- // file already includes that pragma.
- if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
- CC1Args.push_back("-include");
- CC1Args.push_back(
- DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
- "/include/openmp_wrappers/force_usm.h"));
- }
}
void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ed491779abcd00..735e4e4e3be89b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -320,7 +320,6 @@ set(openmp_wrapper_files
openmp_wrappers/__clang_openmp_device_functions.h
openmp_wrappers/complex_cmath.h
openmp_wrappers/new
- openmp_wrappers/usm/force_usm.h
)
set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
deleted file mode 100644
index 15c394e27ce9c2..00000000000000
--- a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __CLANG_FORCE_OPENMP_USM
-#define __CLANG_FORCE_OPENMP_USM
-
-#pragma omp requires unified_shared_memory
-
-#endif
>From f0aaefbe923d2daa1752f3a9664dab3958346c51 Mon Sep 17 00:00:00 2001
From: JP Lehr <JanPatrick.Lehr at amd.com>
Date: Fri, 29 Dec 2023 04:32:24 -0500
Subject: [PATCH 3/3] [OpenMP] Implicit USM Clause Solution
This uses an implicitly added OpenMP USM Clause when initializing SEMA
to enforce the use of USM.
---
clang/include/clang/Basic/LangOptions.def | 1 +
clang/include/clang/Driver/Options.td | 4 ++++
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 ++++++
clang/lib/Driver/ToolChains/Clang.cpp | 2 ++
4 files changed, 13 insertions(+)
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 21abc346cf17ac..81cf2ad9498a7f 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -260,6 +260,7 @@ LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have mo
LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
LANGOPT(OpenMPNoNestedParallelism , 1, 0, "Assume that no thread in a parallel region will encounter a parallel region")
LANGOPT(OpenMPOffloadMandatory , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.")
+LANGOPT(OpenMPForceUSM , 1, 0, "Enable OpenMP unified shared memory mode via compiler.")
LANGOPT(NoGPULib , 1, 0, "Indicate a build without the standard GPU libraries.")
LANGOPT(RenderScript , 1, 0, "RenderScript")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..28290da438c62d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3451,6 +3451,10 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Do not create a host fallback if offloading to the device fails.">,
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
+ Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
+ HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
+ MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CLOption]>,
HelpText<"Emit code that can be JIT compiled for OpenMP offloading. Implies -foffload-lto=full">;
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ea6645a39e8321..09204c30175f64 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1044,6 +1044,12 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
? CGM.getLangOpts().OMPHostIRFile
: StringRef{});
OMPBuilder.setConfig(Config);
+
+ // The user forces the compiler to behave as if omp requires unified_shared_memory was given.
+ if (CGM.getLangOpts().OpenMPForceUSM) {
+ HasRequiresUnifiedSharedMemory = true;
+ OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
+ }
}
void CGOpenMPRuntime::clear() {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index acfa119805068d..ffc24201ab2e0b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6382,6 +6382,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
CmdArgs.push_back("-fopenmp-offload-mandatory");
+ if (Args.hasArg(options::OPT_fopenmp_force_usm))
+ CmdArgs.push_back("-fopenmp-force-usm");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
More information about the cfe-commits
mailing list