[llvm] 9548b74 - [OpenMP] Introduce the OpenMPOpt transformation pass

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 8 12:48:07 PST 2020


Author: Johannes Doerfert
Date: 2020-02-08T14:47:03-06:00
New Revision: 9548b74a831ea005649465797f359e0521f3b8a9

URL: https://github.com/llvm/llvm-project/commit/9548b74a831ea005649465797f359e0521f3b8a9
DIFF: https://github.com/llvm/llvm-project/commit/9548b74a831ea005649465797f359e0521f3b8a9.diff

LOG: [OpenMP] Introduce the OpenMPOpt transformation pass

The OpenMPOpt pass is a CGSCC pass in which OpenMP specific
optimizations can reside.

The OpenMPOpt pass uses the OpenMPKinds.def file to identify runtime
calls and their uses. This allows targeted transformations and eases
their implementation.

This initial patch deduplicates `__kmpc_global_thread_num` and
`omp_get_thread_num` calls. We can also identify arguments that are
equivalent to such a call result and use it instead. Later we can
determine "gtid" arguments based on the use in kernel functions etc.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D69930

Added: 
    llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/gtid.ll

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/include/llvm/InitializePasses.h
    llvm/include/llvm/LinkAllPasses.h
    llvm/include/llvm/Transforms/IPO.h
    llvm/lib/LTO/LTOCodeGenerator.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Transforms/IPO/CMakeLists.txt
    llvm/lib/Transforms/IPO/IPO.cpp
    llvm/lib/Transforms/IPO/LLVMBuild.txt
    llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
    llvm/test/Other/new-pm-defaults.ll
    llvm/test/Other/new-pm-thinlto-defaults.ll
    llvm/test/Other/opt-O2-pipeline.ll
    llvm/test/Other/opt-O3-pipeline.ll
    llvm/test/Other/opt-Os-pipeline.ll
    llvm/test/Other/pass-pipelines.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 7fc2dbf14664..8e016760dd67 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -177,6 +177,8 @@ __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
 
 __OMP_RTL(omp_get_thread_num, false, Int32, )
 
+__OMP_RTL(__last, false, Void, )
+
 #undef __OMP_RTL
 #undef OMP_RTL
 

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index a5e1310e28b9..a8245d8823cd 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -71,6 +71,7 @@ void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&);
 void initializeAliasSetPrinterPass(PassRegistry&);
 void initializeAlignmentFromAssumptionsPass(PassRegistry&);
 void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
+void initializeOpenMPOptLegacyPassPass(PassRegistry &);
 void initializeArgPromotionPass(PassRegistry&);
 void initializeAssumptionCacheTrackerPass(PassRegistry&);
 void initializeAtomicExpandPass(PassRegistry&);

diff  --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index aa64296f9428..5b3bf3e66a43 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -71,6 +71,7 @@ namespace {
       (void) llvm::createAggressiveDCEPass();
       (void) llvm::createAggressiveInstCombinerPass();
       (void) llvm::createBitTrackingDCEPass();
+      (void) llvm::createOpenMPOptLegacyPass();
       (void) llvm::createArgumentPromotionPass();
       (void) llvm::createAlignmentFromAssumptionsPass();
       (void) llvm::createBasicAAWrapperPass();

diff  --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index ee411a10740a..16ce81400019 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -152,6 +152,10 @@ ModulePass *createDeadArgHackingPass();
 ///
 Pass *createArgumentPromotionPass(unsigned maxElements = 3);
 
+//===----------------------------------------------------------------------===//
+/// createOpenMPOptLegacyPass - OpenMP specific optimizations.
+Pass *createOpenMPOptLegacyPass();
+
 //===----------------------------------------------------------------------===//
 /// createIPConstantPropagationPass - This pass propagates constants from call
 /// sites into the bodies of functions.

diff  --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
new file mode 100644
index 000000000000..0bd81ea8f543
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
@@ -0,0 +1,54 @@
+//===- IPO/OpenMPOpt.h - Collection of OpenMP optimizations -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_OPENMP_OPT_H
+#define LLVM_TRANSFORMS_IPO_OPENMP_OPT_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+namespace omp {
+
+/// Helper to remember if the module contains OpenMP (runtime calls), to be used
+/// foremost with containsOpenMP.
+struct OpenMPInModule {
+  OpenMPInModule &operator=(bool Found) {
+    if (Found)
+      Value = OpenMPInModule::OpenMP::FOUND;
+    else
+      Value = OpenMPInModule::OpenMP::NOT_FOUND;
+    return *this;
+  }
+  bool isKnown() { return Value != OpenMP::UNKNOWN; }
+  operator bool() { return Value != OpenMP::NOT_FOUND; }
+
+private:
+  enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
+};
+
+/// Helper to determine if \p M contains OpenMP (runtime calls).
+bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
+
+} // namespace omp
+
+/// OpenMP optimizations pass.
+class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
+  /// Helper to remember if the module contains OpenMP (runtime calls).
+  omp::OpenMPInModule OMPInModule;
+
+public:
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_OPENMP_OPT_H

diff  --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index a79a52311c27..88f433a9e035 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -134,6 +134,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeSimpleInlinerPass(R);
   initializePruneEHPass(R);
   initializeGlobalDCELegacyPassPass(R);
+  initializeOpenMPOptLegacyPassPass(R);
   initializeArgPromotionPass(R);
   initializeJumpThreadingPass(R);
   initializeSROALegacyPassPass(R);

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8dc14680e6ba..2ab0445bc6de 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -87,6 +87,7 @@
 #include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
 #include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
 #include "llvm/Transforms/IPO/PartialInlining.h"
 #include "llvm/Transforms/IPO/SCCP.h"
 #include "llvm/Transforms/IPO/SampleProfile.h"
@@ -837,6 +838,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   if (Level == OptimizationLevel::O3)
     MainCGPipeline.addPass(ArgumentPromotionPass());
 
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+    MainCGPipeline.addPass(OpenMPOptPass());
+
   // Lastly, add the core function simplification pipeline nested inside the
   // CGSCC walk.
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 6bffe1a5b5c7..83ed93dd4718 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -109,6 +109,7 @@ CGSCC_PASS("argpromotion", ArgumentPromotionPass())
 CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
 CGSCC_PASS("inline", InlinerPass())
+CGSCC_PASS("openmpopt", OpenMPOptPass())
 CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
 #undef CGSCC_PASS
 

diff  --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 375d2fb3063b..034450f440ba 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_component_library(LLVMipo
   LoopExtractor.cpp
   LowerTypeTests.cpp
   MergeFunctions.cpp
+  OpenMPOpt.cpp
   PartialInlining.cpp
   PassManagerBuilder.cpp
   PruneEH.cpp

diff  --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index 8a15800cbdb5..3b6038e83071 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -23,6 +23,7 @@
 using namespace llvm;
 
 void llvm::initializeIPO(PassRegistry &Registry) {
+  initializeOpenMPOptLegacyPassPass(Registry);
   initializeArgPromotionPass(Registry);
   initializeCalledValuePropagationLegacyPassPass(Registry);
   initializeConstantMergeLegacyPassPass(Registry);

diff  --git a/llvm/lib/Transforms/IPO/LLVMBuild.txt b/llvm/lib/Transforms/IPO/LLVMBuild.txt
index 14aa4e4ba372..ea207e81fce8 100644
--- a/llvm/lib/Transforms/IPO/LLVMBuild.txt
+++ b/llvm/lib/Transforms/IPO/LLVMBuild.txt
@@ -19,4 +19,4 @@ type = Library
 name = IPO
 parent = Transforms
 library_name = ipo
-required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
+required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core FrontendOpenMP InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
new file mode 100644
index 000000000000..1822b5ec1072
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -0,0 +1,419 @@
+//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// OpenMP specific optimizations:
+//
+// - Deduplication of runtime calls, e.g., omp_get_thread_num.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
+
+#include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+
+using namespace llvm;
+using namespace omp;
+using namespace types;
+
+#define DEBUG_TYPE "openmp-opt"
+
+static cl::opt<bool> DisableOpenMPOptimizations(
+    "openmp-opt-disable", cl::ZeroOrMore,
+    cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
+    cl::init(false));
+
+STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
+          "Number of OpenMP runtime calls deduplicated");
+STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
+          "Number of OpenMP runtime functions identified");
+STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
+          "Number of OpenMP runtime function uses identified");
+
+static constexpr auto TAG = "[" DEBUG_TYPE "]";
+
+namespace {
+struct OpenMPOpt {
+
+  OpenMPOpt(SmallPtrSetImpl<Function *> &SCC,
+            SmallPtrSetImpl<Function *> &ModuleSlice,
+            CallGraphUpdater &CGUpdater)
+      : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
+        CGUpdater(CGUpdater) {
+    initializeTypes(M);
+    initializeRuntimeFunctions();
+  }
+
+  /// Generic information that describes a runtime function
+  struct RuntimeFunctionInfo {
+    /// The kind, as described by the RuntimeFunction enum.
+    RuntimeFunction Kind;
+
+    /// The name of the function.
+    StringRef Name;
+
+    /// Flag to indicate a variadic function.
+    bool IsVarArg;
+
+    /// The return type of the function.
+    Type *ReturnType;
+
+    /// The argument types of the function.
+    SmallVector<Type *, 8> ArgumentTypes;
+
+    /// The declaration if available.
+    Function *Declaration;
+
+    /// Uses of this runtime function per function containing the use.
+    DenseMap<Function *, SmallPtrSet<Use *, 16>> UsesMap;
+
+    /// Return the number of arguments (or the minimal number for variadic
+    /// functions).
+    size_t getNumArgs() const { return ArgumentTypes.size(); }
+
+    /// Run the callback \p CB on each use and forget the use if the result is
+    /// true. The callback will be fed the function in which the use was
+    /// encountered as second argument.
+    void foreachUse(function_ref<bool(Use &, Function &)> CB) {
+      SmallVector<Use *, 8> ToBeDeleted;
+      for (auto &It : UsesMap) {
+        ToBeDeleted.clear();
+        for (Use *U : It.second)
+          if (CB(*U, *It.first))
+            ToBeDeleted.push_back(U);
+        for (Use *U : ToBeDeleted)
+          It.second.erase(U);
+      }
+    }
+  };
+
+  /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
+  bool run() {
+    bool Changed = false;
+
+    LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
+                      << " functions in a slice with " << ModuleSlice.size()
+                      << " functions\n");
+
+    Changed |= deduplicateRuntimeCalls();
+
+    return Changed;
+  }
+
+private:
+  /// Try to eliminiate runtime calls by reusing existing ones.
+  bool deduplicateRuntimeCalls() {
+    bool Changed = false;
+
+    SmallSetVector<Value *, 16> GTIdArgs;
+    collectGlobalThreadIdArguments(GTIdArgs);
+    LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
+                      << " global thread ID arguments\n");
+
+    for (Function *F : SCC) {
+      Value *GTIdArg = nullptr;
+      for (Argument &Arg : F->args())
+        if (GTIdArgs.count(&Arg)) {
+          GTIdArg = &Arg;
+          break;
+        }
+      Changed |= deduplicateRuntimeCalls(
+          *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
+      Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]);
+    }
+
+    return Changed;
+  }
+
+  /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
+  /// \p ReplVal if given.
+  bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
+                               Value *ReplVal = nullptr) {
+    auto &Uses = RFI.UsesMap[&F];
+    if (Uses.size() + (ReplVal != nullptr) < 2)
+      return false;
+
+    LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of "
+                      << RFI.Name
+                      << (ReplVal ? " with an existing value\n" : "\n")
+                      << "\n");
+    assert(!ReplVal || (isa<Argument>(ReplVal) &&
+                        cast<Argument>(ReplVal)->getParent() == &F) &&
+                           "Unexpected replacement value!");
+    if (!ReplVal) {
+      for (Use *U : Uses)
+        if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
+          CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
+          ReplVal = CI;
+          break;
+        }
+      if (!ReplVal)
+        return false;
+    }
+
+    bool Changed = false;
+    auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
+      CallInst *CI = getCallIfRegularCall(U, &RFI);
+      if (!CI || CI == ReplVal || &F != &Caller)
+        return false;
+      assert(CI->getCaller() == &F && "Unexpected call!");
+      CGUpdater.removeCallSite(*CI);
+      CI->replaceAllUsesWith(ReplVal);
+      CI->eraseFromParent();
+      ++NumOpenMPRuntimeCallsDeduplicated;
+      Changed = true;
+      return true;
+    };
+    RFI.foreachUse(ReplaceAndDeleteCB);
+
+    return Changed;
+  }
+
+  /// Collect arguments that represent the global thread id in \p GTIdArgs.
+  void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
+    // TODO: Below we basically perform a fixpoint iteration with a pessimistic
+    //       initialization. We could define an AbstractAttribute instead and
+    //       run the Attributor here once it can be run as an SCC pass.
+
+    // Helper to check the argument \p ArgNo at all call sites of \p F for
+    // a GTId.
+    auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
+      if (!F.hasLocalLinkage())
+        return false;
+      for (Use &U : F.uses()) {
+        if (CallInst *CI = getCallIfRegularCall(U)) {
+          Value *ArgOp = CI->getArgOperand(ArgNo);
+          if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
+              getCallIfRegularCall(*ArgOp,
+                                   &RFIs[OMPRTL___kmpc_global_thread_num]))
+            continue;
+        }
+        return false;
+      }
+      return true;
+    };
+
+    // Helper to identify uses of a GTId as GTId arguments.
+    auto AddUserArgs = [&](Value &GTId) {
+      for (Use &U : GTId.uses())
+        if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
+          if (CI->isArgOperand(&U))
+            if (Function *Callee = CI->getCalledFunction())
+              if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
+                GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
+    };
+
+    // The argument users of __kmpc_global_thread_num calls are GTIds.
+    RuntimeFunctionInfo &GlobThreadNumRFI =
+        RFIs[OMPRTL___kmpc_global_thread_num];
+    for (auto &It : GlobThreadNumRFI.UsesMap)
+      for (Use *U : It.second)
+        if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI))
+          AddUserArgs(*CI);
+
+    // Transitively search for more arguments by looking at the users of the
+    // ones we know already. During the search the GTIdArgs vector is extended
+    // so we cannot cache the size nor can we use a range based for.
+    for (unsigned u = 0; u < GTIdArgs.size(); ++u)
+      AddUserArgs(*GTIdArgs[u]);
+  }
+
+  /// Return the call if \p U is a callee use in a regular call. If \p RFI is
+  /// given it has to be the callee or a nullptr is returned.
+  CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) {
+    CallInst *CI = dyn_cast<CallInst>(U.getUser());
+    if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
+        (!RFI || CI->getCalledFunction() == RFI->Declaration))
+      return CI;
+    return nullptr;
+  }
+
+  /// Return the call if \p V is a regular call. If \p RFI is given it has to be
+  /// the callee or a nullptr is returned.
+  CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) {
+    CallInst *CI = dyn_cast<CallInst>(&V);
+    if (CI && !CI->hasOperandBundles() &&
+        (!RFI || CI->getCalledFunction() == RFI->Declaration))
+      return CI;
+    return nullptr;
+  }
+
+  /// Helper to initialize all runtime function information for those defined in
+  /// OpenMPKinds.def.
+  void initializeRuntimeFunctions() {
+    // Helper to collect all uses of the decleration in the UsesMap.
+    auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
+      unsigned NumUses = 0;
+      if (!RFI.Declaration)
+        return NumUses;
+
+      NumOpenMPRuntimeFunctionsIdentified += 1;
+      NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
+
+      // TODO: We directly convert uses into proper calls and unknown uses.
+      for (Use &U : RFI.Declaration->uses()) {
+        if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
+          if (ModuleSlice.count(UserI->getFunction())) {
+            RFI.UsesMap[UserI->getFunction()].insert(&U);
+            ++NumUses;
+          }
+        } else {
+          RFI.UsesMap[nullptr].insert(&U);
+          ++NumUses;
+        }
+      }
+      return NumUses;
+    };
+
+#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
+  {                                                                            \
+    auto &RFI = RFIs[_Enum];                                                   \
+    RFI.Kind = _Enum;                                                          \
+    RFI.Name = _Name;                                                          \
+    RFI.IsVarArg = _IsVarArg;                                                  \
+    RFI.ReturnType = _ReturnType;                                              \
+    RFI.ArgumentTypes = SmallVector<Type *, 8>({__VA_ARGS__});                 \
+    RFI.Declaration = M.getFunction(_Name);                                    \
+    unsigned NumUses = CollectUses(RFI);                                       \
+    (void)NumUses;                                                             \
+    LLVM_DEBUG({                                                               \
+      dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")             \
+             << " found\n";                                                    \
+      if (RFI.Declaration)                                                     \
+        dbgs() << TAG << "-> got " << NumUses << " uses in "                   \
+               << RFI.UsesMap.size() << " 
diff erent functions.\n";             \
+    });                                                                        \
+  }
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+    // TODO: We should validate the declaration agains the types we expect.
+    // TODO: We should attach the attributes defined in OMPKinds.def.
+  }
+
+  /// The underyling module.
+  Module &M;
+
+  /// The SCC we are operating on.
+  SmallPtrSetImpl<Function *> &SCC;
+
+  /// The slice of the module we are allowed to look at.
+  SmallPtrSetImpl<Function *> &ModuleSlice;
+
+  /// Callback to update the call graph, the first argument is a removed call,
+  /// the second an optional replacement call.
+  CallGraphUpdater &CGUpdater;
+
+  /// Map from runtime function kind to the runtime function description.
+  EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
+                  RuntimeFunction::OMPRTL___last>
+      RFIs;
+};
+} // namespace
+
+PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
+                                     CGSCCAnalysisManager &AM,
+                                     LazyCallGraph &CG, CGSCCUpdateResult &UR) {
+  if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
+    return PreservedAnalyses::all();
+
+  if (DisableOpenMPOptimizations)
+    return PreservedAnalyses::all();
+
+  SmallPtrSet<Function *, 16> SCC;
+  for (LazyCallGraph::Node &N : C)
+    SCC.insert(&N.getFunction());
+
+  if (SCC.empty())
+    return PreservedAnalyses::all();
+
+  CallGraphUpdater CGUpdater;
+  CGUpdater.initialize(CG, C, AM, UR);
+  // TODO: Compute the module slice we are allowed to look at.
+  OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
+  bool Changed = OMPOpt.run();
+  (void)Changed;
+  return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct OpenMPOptLegacyPass : public CallGraphSCCPass {
+  CallGraphUpdater CGUpdater;
+  OpenMPInModule OMPInModule;
+  static char ID;
+
+  OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
+    initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    CallGraphSCCPass::getAnalysisUsage(AU);
+  }
+
+  bool doInitialization(CallGraph &CG) override {
+    // Disable the pass if there is no OpenMP (runtime call) in the module.
+    containsOpenMP(CG.getModule(), OMPInModule);
+    return false;
+  }
+
+  bool runOnSCC(CallGraphSCC &CGSCC) override {
+    if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
+      return false;
+    if (DisableOpenMPOptimizations || skipSCC(CGSCC))
+      return false;
+
+    SmallPtrSet<Function *, 16> SCC;
+    for (CallGraphNode *CGN : CGSCC)
+      if (Function *Fn = CGN->getFunction())
+        if (!Fn->isDeclaration())
+          SCC.insert(Fn);
+
+    if (SCC.empty())
+      return false;
+
+    CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+    CGUpdater.initialize(CG, CGSCC);
+
+    // TODO: Compute the module slice we are allowed to look at.
+    OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
+    return OMPOpt.run();
+  }
+
+  bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
+};
+
+} // end anonymous namespace
+
+bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
+  if (OMPInModule.isKnown())
+    return OMPInModule;
+
+#define OMP_RTL(_Enum, _Name, ...)                                             \
+  if (M.getFunction(_Name))                                                    \
+    return OMPInModule = true;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+  return OMPInModule = false;
+}
+
+char OpenMPOptLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
+                      "OpenMP specific optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
+                    "OpenMP specific optimizations", false, false)
+
+Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }

diff  --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 7cfc29f7bf7a..86ea65fe8e30 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -599,6 +599,11 @@ void PassManagerBuilder::populateModulePassManager(
     RunInliner = true;
   }
 
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (OptLevel > 1)
+    MPM.add(createOpenMPOptLegacyPass());
+
   MPM.add(createPostOrderFunctionAttrsLegacyPass());
   if (OptLevel > 2)
     MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@@ -930,6 +935,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // CSFDO instrumentation and use pass.
   addPGOInstrPasses(PM, /* IsCS */ true);
 
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (OptLevel > 1)
+    PM.add(createOpenMPOptLegacyPass());
+
   // Optimize globals again if we ran the inliner.
   if (RunInliner)
     PM.add(createGlobalOptimizerPass());

diff  --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 1dc96ef3a14c..a386272428d8 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -142,6 +142,7 @@
 ; CHECK-O-NEXT: Running pass: InlinerPass
 ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
 ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
+; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo)
 ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O-NEXT: Starting llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: SROA

diff  --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index 48d59dd6aa77..e44f206e299d 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -107,6 +107,7 @@
 ; CHECK-O-NEXT: Running pass: InlinerPass
 ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
 ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
+; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo)
 ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O-NEXT: Starting llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: SROA

diff  --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll
index 37ffab4bee5c..2e42119c6c52 100644
--- a/llvm/test/Other/opt-O2-pipeline.ll
+++ b/llvm/test/Other/opt-O2-pipeline.ll
@@ -59,6 +59,7 @@
 ; CHECK-NEXT:     Call Graph SCC Pass Manager
 ; CHECK-NEXT:       Remove unused exception handling info
 ; CHECK-NEXT:       Function Integration/Inlining
+; CHECK-NEXT:       OpenMP specific optimizations
 ; CHECK-NEXT:       Deduce function attributes
 ; CHECK-NEXT:       FunctionPass Manager
 ; CHECK-NEXT:         Dominator Tree Construction

diff  --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll
index 3deea84eb069..57963580be54 100644
--- a/llvm/test/Other/opt-O3-pipeline.ll
+++ b/llvm/test/Other/opt-O3-pipeline.ll
@@ -62,6 +62,7 @@
 ; CHECK-NEXT:     Call Graph SCC Pass Manager
 ; CHECK-NEXT:       Remove unused exception handling info
 ; CHECK-NEXT:       Function Integration/Inlining
+; CHECK-NEXT:       OpenMP specific optimizations
 ; CHECK-NEXT:       Deduce function attributes
 ; CHECK-NEXT:       Promote 'by reference' arguments to scalars
 ; CHECK-NEXT:       FunctionPass Manager

diff  --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll
index 59b0720fd0e8..84172237b93c 100644
--- a/llvm/test/Other/opt-Os-pipeline.ll
+++ b/llvm/test/Other/opt-Os-pipeline.ll
@@ -59,6 +59,7 @@
 ; CHECK-NEXT:     Call Graph SCC Pass Manager
 ; CHECK-NEXT:       Remove unused exception handling info
 ; CHECK-NEXT:       Function Integration/Inlining
+; CHECK-NEXT:       OpenMP specific optimizations
 ; CHECK-NEXT:       Deduce function attributes
 ; CHECK-NEXT:       FunctionPass Manager
 ; CHECK-NEXT:         Dominator Tree Construction

diff  --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll
index 6853fd9cbab3..9be9823a5dc8 100644
--- a/llvm/test/Other/pass-pipelines.ll
+++ b/llvm/test/Other/pass-pipelines.ll
@@ -46,6 +46,7 @@
 ; CHECK-O2-NEXT: Call Graph SCC Pass Manager
 ; CHECK-O2-NEXT: Remove unused exception handling info
 ; CHECK-O2-NEXT: Function Integration/Inlining
+; CHECK-O2-NEXT: OpenMP specific optimizations
 ; CHECK-O2-NEXT: Deduce function attributes
 ; Next up is the main function pass pipeline. It shouldn't be split up and
 ; should contain the main loop pass pipeline as well.

diff  --git a/llvm/test/Transforms/OpenMP/gtid.ll b/llvm/test/Transforms/OpenMP/gtid.ll
new file mode 100644
index 000000000000..93a72ca86ec8
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/gtid.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -openmpopt -S < %s | FileCheck %s
+; RUN: opt -passes=openmpopt -S < %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+ at 0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+ at .str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
+
+declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare void @useI32(i32)
+
+define void @external(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@external
+; CHECK-SAME: (i1 [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[E:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    call void @internal(i32 [[C2]], i32 [[C2]])
+; CHECK-NEXT:    call void @useI32(i32 [[C2]])
+; CHECK-NEXT:    br label [[M:%.*]]
+; CHECK:       e:
+; CHECK-NEXT:    call void @internal(i32 [[C2]], i32 [[C2]])
+; CHECK-NEXT:    call void @useI32(i32 [[C2]])
+; CHECK-NEXT:    br label [[M]]
+; CHECK:       m:
+; CHECK-NEXT:    call void @internal(i32 0, i32 [[C2]])
+; CHECK-NEXT:    call void @useI32(i32 [[C2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %c, label %t, label %e
+t:
+  %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @internal(i32 %c0, i32 %c0)
+  call void @useI32(i32 %c0)
+  br label %m
+e:
+  %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @internal(i32 %c1, i32 %c1)
+  call void @useI32(i32 %c1)
+  br label %m
+m:
+  %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @internal(i32 0, i32 %c2)
+  call void @useI32(i32 %c2)
+  ret void
+}
+
+define internal void @internal(i32 %not_gtid, i32 %gtid) {
+; CHECK-LABEL: define {{[^@]+}}@internal
+; CHECK-SAME: (i32 [[NOT_GTID:%.*]], i32 [[GTID:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[GTID]], [[GTID]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[E:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    call void @useI32(i32 [[GTID]])
+; CHECK-NEXT:    call void @external(i1 [[C]])
+; CHECK-NEXT:    br label [[M:%.*]]
+; CHECK:       e:
+; CHECK-NEXT:    call void @useI32(i32 [[GTID]])
+; CHECK-NEXT:    br label [[M]]
+; CHECK:       m:
+; CHECK-NEXT:    call void @useI32(i32 [[GTID]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c = icmp eq i32 %cc, %gtid
+  br i1 %c, label %t, label %e
+t:
+  %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @useI32(i32 %c0)
+  call void @external(i1 %c)
+  br label %m
+e:
+  %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @useI32(i32 %c1)
+  br label %m
+m:
+  %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @useI32(i32 %c2)
+  ret void
+}


        


More information about the llvm-commits mailing list