[llvm] 5ccb742 - [OpenMP] Change OpenMPOpt to check openmp metadata

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 25 13:34:56 PDT 2021


Author: Joseph Huber
Date: 2021-06-25T16:34:22-04:00
New Revision: 5ccb7424fab3bc8bf6cb2ced989cbd86ec8ccff6

URL: https://github.com/llvm/llvm-project/commit/5ccb7424fab3bc8bf6cb2ced989cbd86ec8ccff6
DIFF: https://github.com/llvm/llvm-project/commit/5ccb7424fab3bc8bf6cb2ced989cbd86ec8ccff6.diff

LOG: [OpenMP] Change OpenMPOpt to check openmp metadata

The metadata added in D102361 introduces a module flag that we can check
to determine if the module was compiled with `-fopenmp` enables. We can
now check for the precense of this instead of scanning the call graph
for OpenMP runtime functions.

Depends on D102361

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D102423

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/add_attributes.ll
    llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll
    llvm/test/Transforms/OpenMP/deduplication.ll
    llvm/test/Transforms/OpenMP/deduplication_remarks.ll
    llvm/test/Transforms/OpenMP/globalization_remarks.ll
    llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
    llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
    llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
    llvm/test/Transforms/OpenMP/icv_remarks.ll
    llvm/test/Transforms/OpenMP/icv_tracking.ll
    llvm/test/Transforms/OpenMP/parallel_deletion.ll
    llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
    llvm/test/Transforms/OpenMP/parallel_region_merging.ll
    llvm/test/Transforms/OpenMP/remove_globalization.ll
    llvm/test/Transforms/OpenMP/replace_globalization.ll
    llvm/test/Transforms/OpenMP/rtf_type_checking.ll
    llvm/test/Transforms/OpenMP/single_threaded_execution.ll
    llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
    llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
index 079bd714b46d8..c6ac5bd46f57a 100644
--- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
+++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
@@ -20,60 +20,27 @@ namespace omp {
 /// Summary of a kernel (=entry point for target offloading).
 using Kernel = Function *;
 
-/// Helper to remember if the module contains OpenMP (runtime calls), to be used
-/// foremost with containsOpenMP.
-struct OpenMPInModule {
-  OpenMPInModule &operator=(bool Found) {
-    if (Found)
-      Value = OpenMPInModule::OpenMP::FOUND;
-    else
-      Value = OpenMPInModule::OpenMP::NOT_FOUND;
-    return *this;
-  }
-  bool isKnown() { return Value != OpenMP::UNKNOWN; }
-  operator bool() { return Value != OpenMP::NOT_FOUND; }
+/// Set of kernels in the module
+using KernelSet = SmallPtrSet<Kernel, 4>;
 
-  /// Does this function \p F contain any OpenMP runtime calls?
-  bool containsOMPRuntimeCalls(Function *F) const {
-    return FuncsWithOMPRuntimeCalls.contains(F);
-  }
+/// Helper to determine if \p M contains OpenMP.
+bool containsOpenMP(Module &M);
 
-  /// Return the known kernels (=GPU entry points) in the module.
-  SmallPtrSetImpl<Kernel> &getKernels() { return Kernels; }
+/// Helper to determine if \p M is a OpenMP target offloading device module.
+bool isOpenMPDevice(Module &M);
 
-  /// Identify kernels in the module and populate the Kernels set.
-  void identifyKernels(Module &M);
-
-private:
-  enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
-
-  friend bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
-
-  /// In which functions are OpenMP runtime calls present?
-  SmallPtrSet<Function *, 32> FuncsWithOMPRuntimeCalls;
-
-  /// Collection of known kernels (=GPU entry points) in the module.
-  SmallPtrSet<Kernel, 8> Kernels;
-};
-
-/// Helper to determine if \p M contains OpenMP (runtime calls).
-bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
+/// Get OpenMP device kernels in \p M.
+KernelSet getDeviceKernels(Module &M);
 
 } // namespace omp
 
 /// OpenMP optimizations pass.
 class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
-  /// Helper to remember if the module contains OpenMP (runtime calls).
-  omp::OpenMPInModule OMPInModule;
-
 public:
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 class OpenMPOptCGSCCPass : public PassInfoMixin<OpenMPOptCGSCCPass> {
-  /// Helper to remember if the module contains OpenMP (runtime calls).
-  omp::OpenMPInModule OMPInModule;
-
 public:
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 66901a69df183..32db92ee86463 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1629,7 +1629,7 @@ struct OpenMPOpt {
     for (auto *F : SCC) {
       if (!F->isDeclaration())
         A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
-      if (!OMPInfoCache.Kernels.empty())
+      if (isOpenMPDevice(M))
         A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
     }
   }
@@ -2629,17 +2629,18 @@ AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
 }
 
 PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
-  if (!containsOpenMP(M, OMPInModule))
+  if (!containsOpenMP(M))
     return PreservedAnalyses::all();
-
   if (DisableOpenMPOptimizations)
     return PreservedAnalyses::all();
 
+  KernelSet Kernels = getDeviceKernels(M);
+
   // Create internal copies of each function if this is a kernel Module.
   DenseSet<const Function *> InternalizedFuncs;
-  if (!OMPInModule.getKernels().empty())
+  if (isOpenMPDevice(M))
     for (Function &F : M)
-      if (!F.isDeclaration() && !OMPInModule.getKernels().contains(&F))
+      if (!F.isDeclaration() && !Kernels.contains(&F))
         if (Attributor::internalizeFunction(F, /* Force */ true))
           InternalizedFuncs.insert(&F);
 
@@ -2665,10 +2666,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
   CallGraphUpdater CGUpdater;
 
   SetVector<Function *> Functions(SCC.begin(), SCC.end());
-  OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions,
-                                OMPInModule.getKernels());
+  OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
 
-  unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
+  unsigned MaxFixponitIterations = (Kernels.empty()) ? 64 : 32;
   Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixponitIterations, OREGetter,
                DEBUG_TYPE);
 
@@ -2684,30 +2684,25 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
                                           CGSCCAnalysisManager &AM,
                                           LazyCallGraph &CG,
                                           CGSCCUpdateResult &UR) {
-  if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
+  if (!containsOpenMP(*C.begin()->getFunction().getParent()))
     return PreservedAnalyses::all();
-
   if (DisableOpenMPOptimizations)
     return PreservedAnalyses::all();
 
   SmallVector<Function *, 16> SCC;
   // If there are kernels in the module, we have to run on all SCC's.
-  bool SCCIsInteresting = !OMPInModule.getKernels().empty();
   for (LazyCallGraph::Node &N : C) {
     Function *Fn = &N.getFunction();
     SCC.push_back(Fn);
-
-    // Do we already know that the SCC contains kernels,
-    // or that OpenMP functions are called from this SCC?
-    if (SCCIsInteresting)
-      continue;
-    // If not, let's check that.
-    SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
   }
 
-  if (!SCCIsInteresting || SCC.empty())
+  if (SCC.empty())
     return PreservedAnalyses::all();
 
+  Module &M = *C.begin()->getFunction().getParent();
+
+  KernelSet Kernels = getDeviceKernels(M);
+
   FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
 
@@ -2723,9 +2718,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
 
   SetVector<Function *> Functions(SCC.begin(), SCC.end());
   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
-                                /*CGSCC*/ Functions, OMPInModule.getKernels());
+                                /*CGSCC*/ Functions, Kernels);
 
-  unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
+  unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32;
   Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixponitIterations, OREGetter,
                DEBUG_TYPE);
 
@@ -2741,7 +2736,6 @@ namespace {
 
 struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
   CallGraphUpdater CGUpdater;
-  OpenMPInModule OMPInModule;
   static char ID;
 
   OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
@@ -2752,38 +2746,27 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
     CallGraphSCCPass::getAnalysisUsage(AU);
   }
 
-  bool doInitialization(CallGraph &CG) override {
-    // Disable the pass if there is no OpenMP (runtime call) in the module.
-    containsOpenMP(CG.getModule(), OMPInModule);
-    return false;
-  }
-
   bool runOnSCC(CallGraphSCC &CGSCC) override {
-    if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
+    if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
       return false;
     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
       return false;
 
     SmallVector<Function *, 16> SCC;
     // If there are kernels in the module, we have to run on all SCC's.
-    bool SCCIsInteresting = !OMPInModule.getKernels().empty();
     for (CallGraphNode *CGN : CGSCC) {
       Function *Fn = CGN->getFunction();
       if (!Fn || Fn->isDeclaration())
         continue;
       SCC.push_back(Fn);
-
-      // Do we already know that the SCC contains kernels,
-      // or that OpenMP functions are called from this SCC?
-      if (SCCIsInteresting)
-        continue;
-      // If not, let's check that.
-      SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
     }
 
-    if (!SCCIsInteresting || SCC.empty())
+    if (SCC.empty())
       return false;
 
+    Module &M = CGSCC.getCallGraph().getModule();
+    KernelSet Kernels = getDeviceKernels(M);
+
     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
     CGUpdater.initialize(CG, CGSCC);
 
@@ -2799,11 +2782,11 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
     AnalysisGetter AG;
     SetVector<Function *> Functions(SCC.begin(), SCC.end());
     BumpPtrAllocator Allocator;
-    OMPInformationCache InfoCache(
-        *(Functions.back()->getParent()), AG, Allocator,
-        /*CGSCC*/ Functions, OMPInModule.getKernels());
+    OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
+                                  Allocator,
+                                  /*CGSCC*/ Functions, Kernels);
 
-    unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
+    unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32;
     Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
                  MaxFixponitIterations, OREGetter, DEBUG_TYPE);
 
@@ -2816,11 +2799,13 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
 
 } // end anonymous namespace
 
-void OpenMPInModule::identifyKernels(Module &M) {
-
+KernelSet llvm::omp::getDeviceKernels(Module &M) {
+  // TODO: Create a more cross-platform way of determining device kernels.
   NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+  KernelSet Kernels;
+
   if (!MD)
-    return;
+    return Kernels;
 
   for (auto *Op : MD->operands()) {
     if (Op->getNumOperands() < 2)
@@ -2838,38 +2823,24 @@ void OpenMPInModule::identifyKernels(Module &M) {
 
     Kernels.insert(KernelFn);
   }
-}
 
-bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
-  if (OMPInModule.isKnown())
-    return OMPInModule;
+  return Kernels;
+}
 
-  auto RecordFunctionsContainingUsesOf = [&](Function *F) {
-    for (User *U : F->users())
-      if (auto *I = dyn_cast<Instruction>(U))
-        OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
-  };
+bool llvm::omp::containsOpenMP(Module &M) {
+  Metadata *MD = M.getModuleFlag("openmp");
+  if (!MD)
+    return false;
 
-  // MSVC doesn't like long if-else chains for some reason and instead just
-  // issues an error. Work around it..
-  do {
-#define OMP_RTL(_Enum, _Name, ...)                                             \
-  if (Function *F = M.getFunction(_Name)) {                                    \
-    RecordFunctionsContainingUsesOf(F);                                        \
-    OMPInModule = true;                                                        \
-  }
-#include "llvm/Frontend/OpenMP/OMPKinds.def"
-  } while (false);
+  return true;
+}
 
-  // Identify kernels once. TODO: We should split the OMPInformationCache into a
-  // module and an SCC part. The kernel information, among other things, could
-  // go into the module part.
-  if (OMPInModule.isKnown() && OMPInModule) {
-    OMPInModule.identifyKernels(M);
-    return true;
-  }
+bool llvm::omp::isOpenMPDevice(Module &M) {
+  Metadata *MD = M.getModuleFlag("openmp-device");
+  if (!MD)
+    return false;
 
-  return OMPInModule = false;
+  return true;
 }
 
 char OpenMPOptCGSCCLegacyPass::ID = 0;

diff  --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index 454dfeb43273c..d0edeb0b7f5f6 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -1739,3 +1739,6 @@ attributes #0 = { noinline }
 ; OPTIMISTIC: ; Function Attrs: convergent noinline nounwind 
 ; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32)
 
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll
index 5b40feb1470c2..93214dfd09790 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll
@@ -26,3 +26,7 @@ declare void @__kmpc_syncwarp(i64)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll
index 5c86d43c56c44..77da9284748f6 100644
--- a/llvm/test/Transforms/OpenMP/deduplication.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication.ll
@@ -221,3 +221,7 @@ entry:
   call void @useI32(i32 %tid5)
   ret void
 }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll
index 45438d572f452..2104c5a848d56 100644
--- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll
@@ -30,7 +30,7 @@ declare !dbg !4 void @useI32(i32) local_unnamed_addr
 declare void @llvm.dbg.value(metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9, !10, !11, !12}
+!llvm.module.flags = !{!8, !9, !10, !11, !12, !29}
 !llvm.ident = !{!13}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
@@ -62,3 +62,4 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !26 = !DILocation(line: 9, column: 10, scope: !14)
 !27 = !DILocation(line: 10, column: 2, scope: !14)
 !28 = !DILocation(line: 13, column: 1, scope: !14)
+!29 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
index f1db90d0d3425..a57ae974abb25 100644
--- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
@@ -4,13 +4,14 @@ source_filename = "declare_target_codegen_globalization.cpp"
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64"
 
+; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured.
 ; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
 
 @S = external local_unnamed_addr global i8*
 
 define void @foo() {
 entry:
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !8
+  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
   %x_on_stack = bitcast i8* %0 to i32*
   %1 = bitcast i32* %x_on_stack to i8*
   call void @share(i8* %1)
@@ -30,13 +31,17 @@ declare void @__kmpc_free_shared(i8*)
 
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!nvvm.annotations = !{!7, !8}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
 !1 = !DIFile(filename: "globalization_remarks.c", directory: "/tmp/globalization_remarks.c")
 !2 = !{}
 !3 = !{i32 2, !"Debug Info Version", i32 3}
 !4 = !{i32 1, !"wchar_size", i32 4}
-!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!7 = !DISubroutineType(types: !2)
-!8 = !DILocation(line: 5, column: 7, scope: !6)
+!5 = !{i32 7, !"openmp", i32 50}
+!6 = !{i32 7, !"openmp-device", i32 50}
+!7 = !{void ()* @foo, !"kernel", i32 1}
+!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!9 = !DISubroutineType(types: !2)
+!10 = !DILocation(line: 5, column: 7, scope: !8)

diff  --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
index 27413df87f981..5cbd37e7d67de 100644
--- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
@@ -19,9 +19,11 @@ define void @non_kernel() {
 ; Needed to trigger the openmp-opt pass
 declare dso_local void @__kmpc_kernel_prepare_parallel(i8*)
 
+!llvm.module.flags = !{!4}
 !nvvm.annotations = !{!2, !0, !1, !3, !1, !2}
 
 !0 = !{void ()* @kernel1, !"kernel", i32 1}
 !1 = !{void ()* @non_kernel, !"non_kernel", i32 1}
 !2 = !{null, !"align", i32 1}
 !3 = !{void ()* @kernel2, !"kernel", i32 1}
+!4 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
index 6c1b0c89597fe..2ef8d760bf3a3 100644
--- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
+++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
@@ -275,5 +275,8 @@ declare void @__kmpc_kernel_end_parallel()
 
 
 !nvvm.annotations = !{!1}
+!llvm.module.flags = !{!2, !3}
 
 !1 = !{void ()* @__omp_offloading_50_6dfa0f01_foo_l6, !"kernel", i32 1}
+!2 = !{i32 7, !"openmp", i32 50}
+!3 = !{i32 7, !"openmp-device", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
index dc8b57a20da3d..e75a7aa22d656 100644
--- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
+++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll
@@ -522,3 +522,7 @@ declare dso_local i32 @rand(...)
 
 ; CHECK: declare void @__tgt_target_data_begin_mapper_issue(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**, %struct.__tgt_async_info*)
 ; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*)
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/icv_remarks.ll b/llvm/test/Transforms/OpenMP/icv_remarks.ll
index a03fe3046861a..ac655bf6f68cf 100644
--- a/llvm/test/Transforms/OpenMP/icv_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/icv_remarks.ll
@@ -67,7 +67,7 @@ attributes #4 = { argmemonly nounwind willreturn }
 attributes #5 = { nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!13, !14, !15}
+!llvm.module.flags = !{!13, !14, !15, !59}
 !llvm.ident = !{!16}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 73cea83a6f5ab521edf3cccfc603534776d691ec)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
@@ -129,3 +129,4 @@ attributes #5 = { nounwind readnone speculatable willreturn }
 !56 = !DILocation(line: 18, column: 1, scope: !33)
 !57 = !{!58}
 !58 = !{i64 2, i64 -1, i64 -1, i1 true}
+!59 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll
index 8952bedf66efe..e19c952d1995e 100644
--- a/llvm/test/Transforms/OpenMP/icv_tracking.ll
+++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll
@@ -675,5 +675,8 @@ define i32 @test6(i32 %0) {
 
 declare i32 @__gxx_personality_v0(...)
 
+!llvm.module.flags = !{!2}
+
 !0 = !{!1}
 !1 = !{i64 2, i64 -1, i64 -1, i1 true}
+!2 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index d1629b8a86f1b..7082cc79dfd00 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -739,6 +739,8 @@ declare void @readonly() readonly
 
 declare void @readnone() readnone
 
+!llvm.module.flags = !{!8}
+
 !0 = !{i32 1, !"wchar_size", i32 4}
 !1 = !{!"clang"}
 !2 = !{!3}
@@ -747,3 +749,4 @@ declare void @readnone() readnone
 !5 = !{!"int", !6, i64 0}
 !6 = !{!"omnipotent char", !7, i64 0}
 !7 = !{!"Simple C/C++ TBAA"}
+!8 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
index 49041200f249e..ae81f12fcfed4 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
@@ -69,7 +69,7 @@ attributes #1 = { readonly willreturn }
 attributes #2 = { readnone willreturn }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !10, !11, !12, !13}
+!llvm.module.flags = !{!9, !10, !11, !12, !13, !52}
 !llvm.ident = !{!14}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
@@ -124,3 +124,4 @@ attributes #2 = { readnone willreturn }
 !49 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !47, type: !28, flags: DIFlagArtificial)
 !50 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !47, type: !28, flags: DIFlagArtificial)
 !51 = !DILocation(line: 15, column: 2, scope: !47)
+!52 = !{i32 7, !"openmp", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index 4317c7fdcab16..e9ebe61e58186 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -782,11 +782,12 @@ entry:
 }
 
 
-!llvm.module.flags = !{!0}
+!llvm.module.flags = !{!0, !3}
 
 !0 = !{i32 1, !"wchar_size", i32 4}
 !1 = !{!2}
 !2 = !{i64 2, i64 -1, i64 -1, i1 true}
+!3 = !{i32 7, !"openmp", i32 50}
 ; CHECK-LABEL: define {{[^@]+}}@merge
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
@@ -6908,3 +6909,4 @@ entry:
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
+

diff  --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 18a7bb126331a..2edf5d3c219d2 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -30,7 +30,7 @@ define internal void @foo() {
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !9
+  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !11
   call void @use(i8* %0)
   call void @__kmpc_free_shared(i8* %0)
   ret void
@@ -46,7 +46,7 @@ define internal void @bar() {
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
+  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12
   call void @share(i8* %0)
   call void @__kmpc_free_shared(i8* %0)
   ret void
@@ -76,7 +76,7 @@ declare i8* @__kmpc_alloc_shared(i64)
 declare void @__kmpc_free_shared(i8*)
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
+!llvm.module.flags = !{!3, !4, !6, !7}
 !nvvm.annotations = !{!5}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
@@ -85,8 +85,10 @@ declare void @__kmpc_free_shared(i8*)
 !3 = !{i32 2, !"Debug Info Version", i32 3}
 !4 = !{i32 1, !"wchar_size", i32 4}
 !5 = !{void ()* @kernel, !"kernel", i32 1}
-!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!8 = !DISubroutineType(types: !2)
-!9 = !DILocation(line: 2, column: 2, scope: !6)
-!10 = !DILocation(line: 4, column: 2, scope: !7)
+!6 = !{i32 7, !"openmp", i32 50}
+!7 = !{i32 7, !"openmp-device", i32 50}
+!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!10 = !DISubroutineType(types: !2)
+!11 = !DILocation(line: 2, column: 2, scope: !8)
+!12 = !DILocation(line: 4, column: 2, scope: !9)

diff  --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index d195ecefe9e91..c0021bda65a6e 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -35,7 +35,7 @@ entry:
   %cmp = icmp eq i32 %tid, 0
   br i1 %cmp, label %master, label %exit
 master:
-  %x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !9
+  %x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !11
   %x_on_stack = bitcast i8* %x to [4 x i32]*
   %0 = bitcast [4 x i32]* %x_on_stack to i8*
   call void @use(i8* %0)
@@ -58,7 +58,7 @@ entry:
   %3 = icmp eq i32 %tid, %master_tid
   br i1 %3, label %master, label %exit
 master:
-  %y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
+  %y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12
   %y_on_stack = bitcast i8* %y to [4 x i32]*
   %4 = bitcast [4 x i32]* %y_on_stack to i8*
   call void @use(i8* %4)
@@ -87,18 +87,19 @@ declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
 
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
-!nvvm.annotations = !{!5, !6}
-
+!llvm.module.flags = !{!3, !4, !5, !6}
+!nvvm.annotations = !{!7, !8}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
 !1 = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")
 !2 = !{}
 !3 = !{i32 2, !"Debug Info Version", i32 3}
 !4 = !{i32 1, !"wchar_size", i32 4}
-!5 = !{void ()* @foo, !"kernel", i32 1}
-!6 = !{void ()* @bar, !"kernel", i32 1}
-!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!8 = !DISubroutineType(types: !2)
-!9 = !DILocation(line: 5, column: 7, scope: !7)
-!10 = !DILocation(line: 5, column: 14, scope: !7)
+!5 = !{i32 7, !"openmp", i32 50}
+!6 = !{i32 7, !"openmp-device", i32 50}
+!7 = !{void ()* @foo, !"kernel", i32 1}
+!8 = !{void ()* @bar, !"kernel", i32 1}
+!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!10 = !DISubroutineType(types: !2)
+!11 = !DILocation(line: 5, column: 7, scope: !9)
+!12 = !DILocation(line: 5, column: 14, scope: !9)

diff  --git a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll
index ae671ca057677..3ac4e04a752b6 100644
--- a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll
+++ b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll
@@ -49,13 +49,14 @@ declare void @__kmpc_flush(%struct.ident_t*)
 ; Different return type.
 declare void @omp_get_thread_num()
 
-!llvm.module.flags = !{!0}
+!llvm.module.flags = !{!0, !4}
 !llvm.ident = !{!1}
 
 !0 = !{i32 1, !"wchar_size", i32 4}
 !1 = !{!"clang"}
 !2 = !{!3}
 !3 = !{i64 2, i64 -1, i64 -1, i1 true}
+!4 = !{i32 7, !"openmp", i32 50}
 
 ; NPM: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.)
 ; NPM-NOT: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.)

diff  --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
index 327905efd559d..b3d535bd510c9 100644
--- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
+++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
@@ -68,8 +68,8 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 declare void @__kmpc_kernel_init(i32, i16)
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
-!nvvm.annotations = !{!5}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!nvvm.annotations = !{!7}
 
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
@@ -77,4 +77,6 @@ declare void @__kmpc_kernel_init(i32, i16)
 !2 = !{}
 !3 = !{i32 2, !"Debug Info Version", i32 3}
 !4 = !{i32 1, !"wchar_size", i32 4}
-!5 = !{void ()* @kernel, !"kernel", i32 1}
+!5 = !{i32 7, !"openmp", i32 50}
+!6 = !{i32 7, !"openmp-device", i32 50}
+!7 = !{void ()* @kernel, !"kernel", i32 1}

diff  --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
index 488a82296dfc4..2f317fb30e864 100644
--- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
+++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
@@ -70,3 +70,8 @@ declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i
 declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
 
 declare dso_local i32 @rand(...)
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"openmp", i32 50}
+

diff  --git a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
index e3bf6f296b234..467321d2e64d9 100644
--- a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
+++ b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
@@ -9,7 +9,7 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 
 define void @foo() {
 entry:
-  %x = call i8* @__kmpc_alloc_shared(i64 4), !dbg !7
+  %x = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
   %x_on_stack = bitcast i8* %x to i32*
   %0 = bitcast i32* %x_on_stack to i8*
   call void @use(i8* %0)
@@ -36,13 +36,17 @@ declare i8* @_Z10SafeMallocmPKc(i64 %size, i8* nocapture readnone %msg)
 declare void @__kmpc_free_shared(i8*)
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!nvvm.annotations = !{!7}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
 !1 = !DIFile(filename: "openmp_opt_module.c", directory: "/tmp/openmp_opt_module.c")
 !2 = !{}
 !3 = !{i32 2, !"Debug Info Version", i32 3}
 !4 = !{i32 1, !"wchar_size", i32 4}
-!5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!6 = !DISubroutineType(types: !2)
-!7 = !DILocation(line: 5, column: 7, scope: !5)
+!5 = !{i32 7, !"openmp", i32 50}
+!6 = !{i32 7, !"openmp-device", i32 50}
+!7 = !{void ()* @foo, !"kernel", i32 1}
+!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!9 = !DISubroutineType(types: !2)
+!10 = !DILocation(line: 5, column: 7, scope: !8)


        


More information about the llvm-commits mailing list