[clang] [OpenMP] Introduce -fopenmp-force-usm flag (PR #75468)

Jan Patrick Lehr via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 29 08:15:39 PST 2023


https://github.com/jplehr updated https://github.com/llvm/llvm-project/pull/75468

>From 8f381c760fca8a4abd7550c492ff22fa8972933a Mon Sep 17 00:00:00 2001
From: JP Lehr <JanPatrick.Lehr at amd.com>
Date: Thu, 6 Jul 2023 16:47:21 -0400
Subject: [PATCH 1/3] [OpenMP] Introduce -fopenmp-force-usm flag

The new flag implements logic to include #pragma omp requires
unified_shared_memory in every translation unit.
This enables a straightforward way to enable USM for an application
without the need to modify sources.
---
 clang/include/clang/Driver/Options.td            |  2 ++
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp     | 16 ++++++++++++++++
 clang/lib/Headers/CMakeLists.txt                 |  1 +
 .../lib/Headers/openmp_wrappers/usm/force_usm.h  |  6 ++++++
 4 files changed, 25 insertions(+)
 create mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..e33bc7d1b10d71 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
+  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 
 //===----------------------------------------------------------------------===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b012b7cb729378..a077f2f06d7728 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
     const ArgList &DriverArgs, ArgStringList &CC1Args) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+  CC1Args.push_back("-internal-isystem");
+  SmallString<128> P(HostTC.getDriver().ResourceDir);
+  llvm::sys::path::append(P, "include/cuda_wrappers");
+  CC1Args.push_back(DriverArgs.MakeArgString(P));
+
+  // Force USM mode will forcefully include #pragma omp requires
+  // unified_shared_memory via the force_usm header
+  // XXX This may result in a compilation error if the source
+  // file already includes that pragma.
+  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
+    CC1Args.push_back("-include");
+    CC1Args.push_back(
+        DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
+                                 "/include/openmp_wrappers/force_usm.h"));
+  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 735e4e4e3be89b..ed491779abcd00 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -320,6 +320,7 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
+  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
new file mode 100644
index 00000000000000..15c394e27ce9c2
--- /dev/null
+++ b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
@@ -0,0 +1,6 @@
+#ifndef __CLANG_FORCE_OPENMP_USM
+#define __CLANG_FORCE_OPENMP_USM
+
+#pragma omp requires unified_shared_memory
+
+#endif

>From 4d5a1f670b3bdd5b183515e347610414cb12cb90 Mon Sep 17 00:00:00 2001
From: JP Lehr <JanPatrick.Lehr at amd.com>
Date: Fri, 29 Dec 2023 04:33:19 -0500
Subject: [PATCH 2/3] Revert "[OpenMP] Introduce -fopenmp-force-usm flag"

This reverts commit 4ecd07d786a5a994b33b9177d4e21d839bfe3fc9.

To test the other solution.
---
 clang/include/clang/Driver/Options.td            |  2 --
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp     | 16 ----------------
 clang/lib/Headers/CMakeLists.txt                 |  1 -
 .../lib/Headers/openmp_wrappers/usm/force_usm.h  |  6 ------
 4 files changed, 25 deletions(-)
 delete mode 100644 clang/lib/Headers/openmp_wrappers/usm/force_usm.h

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e33bc7d1b10d71..2b93ddf033499c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3381,8 +3381,6 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
-def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
-  Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 
 //===----------------------------------------------------------------------===//
 // Shared cc1 + fc1 OpenMP Target Options
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index a077f2f06d7728..b012b7cb729378 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -129,22 +129,6 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
     const ArgList &DriverArgs, ArgStringList &CC1Args) const {
   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
-
-  CC1Args.push_back("-internal-isystem");
-  SmallString<128> P(HostTC.getDriver().ResourceDir);
-  llvm::sys::path::append(P, "include/cuda_wrappers");
-  CC1Args.push_back(DriverArgs.MakeArgString(P));
-
-  // Force USM mode will forcefully include #pragma omp requires
-  // unified_shared_memory via the force_usm header
-  // XXX This may result in a compilation error if the source
-  // file already includes that pragma.
-  if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
-    CC1Args.push_back("-include");
-    CC1Args.push_back(
-        DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
-                                 "/include/openmp_wrappers/force_usm.h"));
-  }
 }
 
 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ed491779abcd00..735e4e4e3be89b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -320,7 +320,6 @@ set(openmp_wrapper_files
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/complex_cmath.h
   openmp_wrappers/new
-  openmp_wrappers/usm/force_usm.h
 )
 
 set(llvm_libc_wrapper_files
diff --git a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h b/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
deleted file mode 100644
index 15c394e27ce9c2..00000000000000
--- a/clang/lib/Headers/openmp_wrappers/usm/force_usm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __CLANG_FORCE_OPENMP_USM
-#define __CLANG_FORCE_OPENMP_USM
-
-#pragma omp requires unified_shared_memory
-
-#endif

>From f0aaefbe923d2daa1752f3a9664dab3958346c51 Mon Sep 17 00:00:00 2001
From: JP Lehr <JanPatrick.Lehr at amd.com>
Date: Fri, 29 Dec 2023 04:32:24 -0500
Subject: [PATCH 3/3] [OpenMP] Implicit USM Clause Solution

This uses an implicitly added OpenMP USM Clause when initializing SEMA
to enforce the use of USM.
---
 clang/include/clang/Basic/LangOptions.def | 1 +
 clang/include/clang/Driver/Options.td     | 4 ++++
 clang/lib/CodeGen/CGOpenMPRuntime.cpp     | 6 ++++++
 clang/lib/Driver/ToolChains/Clang.cpp     | 2 ++
 4 files changed, 13 insertions(+)

diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 21abc346cf17ac..81cf2ad9498a7f 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -260,6 +260,7 @@ LANGOPT(OpenMPTeamSubscription  , 1, 0, "Assume distributed loops do not have mo
 LANGOPT(OpenMPNoThreadState  , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
 LANGOPT(OpenMPNoNestedParallelism  , 1, 0, "Assume that no thread in a parallel region will encounter a parallel region")
 LANGOPT(OpenMPOffloadMandatory  , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.")
+LANGOPT(OpenMPForceUSM     , 1, 0, "Enable OpenMP unified shared memory mode via compiler.")
 LANGOPT(NoGPULib  , 1, 0, "Indicate a build without the standard GPU libraries.")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..28290da438c62d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3451,6 +3451,10 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
   Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Do not create a host fallback if offloading to the device fails.">,
   MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
+def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
+  Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
+  MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
 def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,
   Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CLOption]>,
   HelpText<"Emit code that can be JIT compiled for OpenMP offloading. Implies -foffload-lto=full">;
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ea6645a39e8321..09204c30175f64 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1044,6 +1044,12 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
                                          ? CGM.getLangOpts().OMPHostIRFile
                                          : StringRef{});
   OMPBuilder.setConfig(Config);
+
+  // The user forces the compiler to behave as if omp requires unified_shared_memory was given.
+  if (CGM.getLangOpts().OpenMPForceUSM) {
+    HasRequiresUnifiedSharedMemory = true;
+    OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
+  }
 }
 
 void CGOpenMPRuntime::clear() {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index acfa119805068d..ffc24201ab2e0b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6382,6 +6382,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
       if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
         CmdArgs.push_back("-fopenmp-offload-mandatory");
+      if (Args.hasArg(options::OPT_fopenmp_force_usm))
+        CmdArgs.push_back("-fopenmp-force-usm");
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code



More information about the cfe-commits mailing list