[llvm] 9efdca8 - [OpenMP] Introduce new flags to assert thread and team usage in the runtime

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 7 19:23:19 PDT 2021


Author: Joseph Huber
Date: 2021-10-07T22:23:09-04:00
New Revision: 9efdca87c78256bb00ed51521272dec2deed7f23

URL: https://github.com/llvm/llvm-project/commit/9efdca87c78256bb00ed51521272dec2deed7f23
DIFF: https://github.com/llvm/llvm-project/commit/9efdca87c78256bb00ed51521272dec2deed7f23.diff

LOG: [OpenMP] Introduce new flags to assert thread and team usage in the runtime

This patch adds two flags to be supported for the new runtime. The flags
are `-fopenmp-assume-threads-oversubscription` and
-fopenmp-assume-teams-oversubscription`. These add global values that
can be checked by the work sharing runtime functions to make better
judgements about how to distribute work between the threads.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D111348

Added: 
    clang/test/OpenMP/target_globals_codegen.cpp

Modified: 
    clang/include/clang/Basic/LangOptions.def
    clang/include/clang/Driver/Options.td
    clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Frontend/CompilerInvocation.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 
    clang/test/OpenMP/target_debug_codegen.cpp


################################################################################
diff  --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 598c847597108..e21d2fcea621e 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -244,6 +244,8 @@ LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records i
 LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading")
 LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL")
 LANGOPT(OpenMPOptimisticCollapse  , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
+LANGOPT(OpenMPThreadSubscription  , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
+LANGOPT(OpenMPTeamSubscription  , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
 
 LANGOPT(CUDAIsDevice      , 1, 0, "compiling for CUDA device")

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7d1e6d181f973..5a87f1276d1f3 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2427,6 +2427,14 @@ def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group<f_Group>,
   HelpText<"Enable debugging in the OpenMP offloading device RTL">;
 def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>;
 def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">, 
+  Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">, 
+  Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">, 
+  Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, 
+  Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
   LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 16f1b0b00b095..be47ccbbbc58a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1200,8 +1200,14 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
     llvm_unreachable("OpenMP NVPTX can only handle device code.");
 
   llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
-  if (CGM.getLangOpts().OpenMPTargetNewRuntime)
-    OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug);
+  if (CGM.getLangOpts().OpenMPTargetNewRuntime) {
+    OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,
+                                "__omp_rtl_debug_kind");
+    OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,
+                                "__omp_rtl_assume_teams_oversubscription");
+    OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,
+                                "__omp_rtl_assume_threads_oversubscription");
+  }
 }
 
 void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 369c12aea5231..3f98914bd1904 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5815,6 +5815,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                        options::OPT_fno_openmp_cuda_force_full_runtime,
                        /*Default=*/false))
         CmdArgs.push_back("-fopenmp-cuda-force-full-runtime");
+
+      // When in OpenMP offloading mode, forward assumptions information about
+      // thread and team counts in the device.
+      if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription,
+                       options::OPT_fno_openmp_assume_teams_oversubscription,
+                       /*Default=*/false))
+        CmdArgs.push_back("-fopenmp-assume-teams-oversubscription");
+      if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription,
+                       options::OPT_fno_openmp_assume_threads_oversubscription,
+                       /*Default=*/false))
+        CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code

diff  --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index e9eed36f36a96..34cbb7aab2666 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3486,6 +3486,12 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
   if (Opts.OpenMPTargetNewRuntime)
     GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA);
 
+  if (Opts.OpenMPThreadSubscription)
+    GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA);
+
+  if (Opts.OpenMPTeamSubscription)
+    GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA);
+
   if (Opts.OpenMPTargetDebug != 0)
     GenerateArg(Args, OPT_fopenmp_target_debug_EQ,
                 Twine(Opts.OpenMPTargetDebug), SA);
@@ -3928,6 +3934,13 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
     }
   }
 
+  if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) {
+    if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription))
+      Opts.OpenMPTeamSubscription = true;
+    if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription))
+      Opts.OpenMPThreadSubscription = true;
+  }
+
   // Get the OpenMP target triples if any.
   if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) {
     enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit };

diff  --git a/clang/test/OpenMP/target_debug_codegen.cpp b/clang/test/OpenMP/target_debug_codegen.cpp
deleted file mode 100644
index 5932e12a4d5df..0000000000000
--- a/clang/test/OpenMP/target_debug_codegen.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_debug_kind"
-// Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
-// expected-no-diagnostics
-
-#ifndef HEADER
-#define HEADER
-
-//.
-// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1
-//.
-// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111
-//.
-// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0
-//.
-void foo() {
-#pragma omp target
-  { }
-}
-
-#endif

diff  --git a/clang/test/OpenMP/target_globals_codegen.cpp b/clang/test/OpenMP/target_globals_codegen.cpp
new file mode 100644
index 0000000000000..1264266340729
--- /dev/null
+++ b/clang/test/OpenMP/target_globals_codegen.cpp
@@ -0,0 +1,40 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_"
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+//.
+// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1
+// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
+// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
+//.
+// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111
+// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
+// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
+//.
+// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0
+// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
+// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
+//.
+// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr constant i32 0
+// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
+// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 1
+//.
+// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr constant i32 0
+// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 1
+// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
+//.
+void foo() {
+#pragma omp target
+  { }
+}
+
+#endif

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2496714bdfa0c..563e0eed17629 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -683,9 +683,8 @@ class OpenMPIRBuilder {
                           omp::IdentFlag Flags = omp::IdentFlag(0),
                           unsigned Reserve2Flags = 0);
 
-  /// Create a global value containing the \p DebugLevel to control debuggin in
-  /// the module.
-  GlobalValue *createDebugKind(unsigned DebugLevel);
+  /// Create a global flag \p Namein the module with initial value \p Value.
+  GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
 
   /// Generate control flow and cleanup for cancellation.
   ///

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 36f6fa68ce242..20e36c8143704 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -245,12 +245,12 @@ OpenMPIRBuilder::~OpenMPIRBuilder() {
   assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
 }
 
-GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) {
+GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
   IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
-  auto *GV = new GlobalVariable(
-      M, I32Ty,
-      /* isConstant = */ true, GlobalValue::WeakODRLinkage,
-      ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind");
+  auto *GV =
+      new GlobalVariable(M, I32Ty,
+                         /* isConstant = */ true, GlobalValue::WeakODRLinkage,
+                         ConstantInt::get(I32Ty, Value), Name);
 
   return GV;
 }


        


More information about the llvm-commits mailing list