[llvm] c3ddc13 - [NFC] Split up PassBuilder.cpp

Arthur Eubanks via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 15 15:40:05 PDT 2021


Author: Arthur Eubanks
Date: 2021-09-15T15:30:39-07:00
New Revision: c3ddc13d7d631a29b55f82c7cc7a9008bf89b1f4

URL: https://github.com/llvm/llvm-project/commit/c3ddc13d7d631a29b55f82c7cc7a9008bf89b1f4
DIFF: https://github.com/llvm/llvm-project/commit/c3ddc13d7d631a29b55f82c7cc7a9008bf89b1f4.diff

LOG: [NFC] Split up PassBuilder.cpp

PassBuilder.cpp is the slowest file to compile in LLVM.
When trying to test changes to pipelines, it takes a long time to recompile.

This doesn't actually speedup building PassBuilder.cpp itself since most
of the time is spent in other large/duplicated functions caused by
PassRegistry.def.

Reviewed By: asbirlea

Differential Revision: https://reviews.llvm.org/D109798

Added: 
    llvm/lib/Passes/OptimizationLevel.cpp
    llvm/lib/Passes/PassBuilderPipelines.cpp

Modified: 
    llvm/lib/Passes/CMakeLists.txt
    llvm/lib/Passes/PassBuilder.cpp
    llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Passes/CMakeLists.txt b/llvm/lib/Passes/CMakeLists.txt
index 89e960f01e2eb..703969f8b5f40 100644
--- a/llvm/lib/Passes/CMakeLists.txt
+++ b/llvm/lib/Passes/CMakeLists.txt
@@ -1,6 +1,8 @@
 add_llvm_component_library(LLVMPasses
+  OptimizationLevel.cpp
   PassBuilder.cpp
   PassBuilderBindings.cpp
+  PassBuilderPipelines.cpp
   PassPlugin.cpp
   StandardInstrumentations.cpp
 

diff  --git a/llvm/lib/Passes/OptimizationLevel.cpp b/llvm/lib/Passes/OptimizationLevel.cpp
new file mode 100644
index 0000000000000..a1f8c1e14b1f0
--- /dev/null
+++ b/llvm/lib/Passes/OptimizationLevel.cpp
@@ -0,0 +1,30 @@
+//===- OptimizationLevel.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/OptimizationLevel.h"
+
+using namespace llvm;
+
+const OptimizationLevel OptimizationLevel::O0 = {
+    /*SpeedLevel*/ 0,
+    /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O1 = {
+    /*SpeedLevel*/ 1,
+    /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O2 = {
+    /*SpeedLevel*/ 2,
+    /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O3 = {
+    /*SpeedLevel*/ 3,
+    /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::Os = {
+    /*SpeedLevel*/ 2,
+    /*SizeLevel*/ 1};
+const OptimizationLevel OptimizationLevel::Oz = {
+    /*SpeedLevel*/ 2,
+    /*SizeLevel*/ 2};

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 076ff95cb8d21..a868f103f9327 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1,4 +1,4 @@
-//===- Parsing, selection, and construction of pass pipelines -------------===//
+//===- Parsing and selection of pass pipelines ----------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -242,108 +242,16 @@
 
 using namespace llvm;
 
-static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
-    "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
-    cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
-    cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
-                          "Heuristics-based inliner version."),
-               clEnumValN(InliningAdvisorMode::Development, "development",
-                          "Use development mode (runtime-loadable model)."),
-               clEnumValN(InliningAdvisorMode::Release, "release",
-                          "Use release mode (AOT-compiled model).")));
-
-static cl::opt<bool> EnableSyntheticCounts(
-    "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
-    cl::desc("Run synthetic function entry count generation "
-             "pass"));
-
 static const Regex DefaultAliasRegex(
     "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
 
-/// Flag to enable inline deferral during PGO.
-static cl::opt<bool>
-    EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
-                            cl::Hidden,
-                            cl::desc("Enable inline deferral during PGO"));
-
-static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
-                                       cl::Hidden, cl::ZeroOrMore,
-                                       cl::desc("Enable memory profiler"));
-
-static cl::opt<bool> PerformMandatoryInliningsFirst(
-    "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
-    cl::desc("Perform mandatory inlinings module-wide, before performing "
-             "inlining."));
-
-static cl::opt<bool> EnableO3NonTrivialUnswitching(
-    "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
-    cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
-
-PipelineTuningOptions::PipelineTuningOptions() {
-  LoopInterleaving = true;
-  LoopVectorization = true;
-  SLPVectorization = false;
-  LoopUnrolling = true;
-  ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
-  LicmMssaOptCap = SetLicmMssaOptCap;
-  LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
-  CallGraphProfile = true;
-  MergeFunctions = false;
-}
-
 namespace llvm {
 cl::opt<bool> PrintPipelinePasses(
     "print-pipeline-passes",
     cl::desc("Print a '-passes' compatible string describing the pipeline "
              "(best-effort only)."));
-
-extern cl::opt<unsigned> MaxDevirtIterations;
-extern cl::opt<bool> EnableConstraintElimination;
-extern cl::opt<bool> EnableFunctionSpecialization;
-extern cl::opt<bool> EnableGVNHoist;
-extern cl::opt<bool> EnableGVNSink;
-extern cl::opt<bool> EnableHotColdSplit;
-extern cl::opt<bool> EnableIROutliner;
-extern cl::opt<bool> EnableOrderFileInstrumentation;
-extern cl::opt<bool> EnableCHR;
-extern cl::opt<bool> EnableLoopInterchange;
-extern cl::opt<bool> EnableUnrollAndJam;
-extern cl::opt<bool> EnableLoopFlatten;
-extern cl::opt<bool> EnableDFAJumpThreading;
-extern cl::opt<bool> RunNewGVN;
-extern cl::opt<bool> RunPartialInlining;
-extern cl::opt<bool> ExtraVectorizerPasses;
-
-extern cl::opt<bool> FlattenedProfileUsed;
-
-extern cl::opt<AttributorRunOption> AttributorRun;
-extern cl::opt<bool> EnableKnowledgeRetention;
-
-extern cl::opt<bool> EnableMatrix;
-
-extern cl::opt<bool> DisablePreInliner;
-extern cl::opt<int> PreInlineThreshold;
 } // namespace llvm
 
-const OptimizationLevel OptimizationLevel::O0 = {
-    /*SpeedLevel*/ 0,
-    /*SizeLevel*/ 0};
-const OptimizationLevel OptimizationLevel::O1 = {
-    /*SpeedLevel*/ 1,
-    /*SizeLevel*/ 0};
-const OptimizationLevel OptimizationLevel::O2 = {
-    /*SpeedLevel*/ 2,
-    /*SizeLevel*/ 0};
-const OptimizationLevel OptimizationLevel::O3 = {
-    /*SpeedLevel*/ 3,
-    /*SizeLevel*/ 0};
-const OptimizationLevel OptimizationLevel::Os = {
-    /*SpeedLevel*/ 2,
-    /*SizeLevel*/ 1};
-const OptimizationLevel OptimizationLevel::Oz = {
-    /*SpeedLevel*/ 2,
-    /*SizeLevel*/ 2};
-
 namespace {
 
 // The following passes/analyses have custom names, otherwise their name will
@@ -484,12 +392,6 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
   }
 }
 
-void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
-                                            OptimizationLevel Level) {
-  for (auto &C : PeepholeEPCallbacks)
-    C(FPM, Level);
-}
-
 void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
 #define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
   MAM.registerPass([&] { return CREATE_PASS; });
@@ -526,1525 +428,6 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
     C(LAM);
 }
 
-// Helper to add AnnotationRemarksPass.
-static void addAnnotationRemarksPass(ModulePassManager &MPM) {
-  FunctionPassManager FPM;
-  FPM.addPass(AnnotationRemarksPass());
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-}
-
-// Helper to check if the current compilation phase is preparing for LTO
-static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
-  return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
-         Phase == ThinOrFullLTOPhase::FullLTOPreLink;
-}
-
-// TODO: Investigate the cost/benefit of tail call elimination on debugging.
-FunctionPassManager
-PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
-                                                   ThinOrFullLTOPhase Phase) {
-
-  FunctionPassManager FPM;
-
-  // Form SSA out of local memory accesses after breaking apart aggregates into
-  // scalars.
-  FPM.addPass(SROA());
-
-  // Catch trivial redundancies
-  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
-
-  // Hoisting of scalars and load expressions.
-  FPM.addPass(SimplifyCFGPass());
-  FPM.addPass(InstCombinePass());
-
-  FPM.addPass(LibCallsShrinkWrapPass());
-
-  invokePeepholeEPCallbacks(FPM, Level);
-
-  FPM.addPass(SimplifyCFGPass());
-
-  // Form canonically associated expression trees, and simplify the trees using
-  // basic mathematical properties. For example, this will form (nearly)
-  // minimal multiplication trees.
-  FPM.addPass(ReassociatePass());
-
-  // Add the primary loop simplification pipeline.
-  // FIXME: Currently this is split into two loop pass pipelines because we run
-  // some function passes in between them. These can and should be removed
-  // and/or replaced by scheduling the loop pass equivalents in the correct
-  // positions. But those equivalent passes aren't powerful enough yet.
-  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
-  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
-  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
-  // `LoopInstSimplify`.
-  LoopPassManager LPM1, LPM2;
-
-  // Simplify the loop body. We do this initially to clean up after other loop
-  // passes run, either when iterating on a loop or on inner loops with
-  // implications on the outer loop.
-  LPM1.addPass(LoopInstSimplifyPass());
-  LPM1.addPass(LoopSimplifyCFGPass());
-
-  // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
-  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-
-  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
-                              isLTOPreLink(Phase)));
-  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-  LPM1.addPass(SimpleLoopUnswitchPass());
-
-  LPM2.addPass(LoopIdiomRecognizePass());
-  LPM2.addPass(IndVarSimplifyPass());
-
-  for (auto &C : LateLoopOptimizationsEPCallbacks)
-    C(LPM2, Level);
-
-  LPM2.addPass(LoopDeletionPass());
-
-  if (EnableLoopInterchange)
-    LPM2.addPass(LoopInterchangePass());
-
-  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
-  // because it changes IR to makes profile annotation in back compile
-  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
-  // attributes so we need to make sure and allow the full unroll pass to pay
-  // attention to it.
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
-      PGOOpt->Action != PGOOptions::SampleUse)
-    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
-                                    /* OnlyWhenForced= */ !PTO.LoopUnrolling,
-                                    PTO.ForgetAllSCEVInLoopUnroll));
-
-  for (auto &C : LoopOptimizerEndEPCallbacks)
-    C(LPM2, Level);
-
-  // We provide the opt remark emitter pass for LICM to use. We only need to do
-  // this once as it is immutable.
-  FPM.addPass(
-      RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
-                                              /*UseMemorySSA=*/true,
-                                              /*UseBlockFrequencyInfo=*/true));
-  FPM.addPass(SimplifyCFGPass());
-  FPM.addPass(InstCombinePass());
-  if (EnableLoopFlatten)
-    FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
-  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
-  // *All* loop passes must preserve it, in order to be able to use it.
-  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
-                                              /*UseMemorySSA=*/false,
-                                              /*UseBlockFrequencyInfo=*/false));
-
-  // Delete small array after loop unroll.
-  FPM.addPass(SROA());
-
-  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
-  FPM.addPass(MemCpyOptPass());
-
-  // Sparse conditional constant propagation.
-  // FIXME: It isn't clear why we do this *after* loop passes rather than
-  // before...
-  FPM.addPass(SCCPPass());
-
-  // Delete dead bit computations (instcombine runs after to fold away the dead
-  // computations, and then ADCE will run later to exploit any new DCE
-  // opportunities that creates).
-  FPM.addPass(BDCEPass());
-
-  // Run instcombine after redundancy and dead bit elimination to exploit
-  // opportunities opened up by them.
-  FPM.addPass(InstCombinePass());
-  invokePeepholeEPCallbacks(FPM, Level);
-
-  FPM.addPass(CoroElidePass());
-
-  for (auto &C : ScalarOptimizerLateEPCallbacks)
-    C(FPM, Level);
-
-  // Finally, do an expensive DCE pass to catch all the dead code exposed by
-  // the simplifications and basic cleanup after all the simplifications.
-  // TODO: Investigate if this is too expensive.
-  FPM.addPass(ADCEPass());
-  FPM.addPass(SimplifyCFGPass());
-  FPM.addPass(InstCombinePass());
-  invokePeepholeEPCallbacks(FPM, Level);
-
-  return FPM;
-}
-
-FunctionPassManager
-PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
-                                                 ThinOrFullLTOPhase Phase) {
-  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
-
-  // The O1 pipeline has a separate pipeline creation function to simplify
-  // construction readability.
-  if (Level.getSpeedupLevel() == 1)
-    return buildO1FunctionSimplificationPipeline(Level, Phase);
-
-  FunctionPassManager FPM;
-
-  // Form SSA out of local memory accesses after breaking apart aggregates into
-  // scalars.
-  FPM.addPass(SROA());
-
-  // Catch trivial redundancies
-  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
-  if (EnableKnowledgeRetention)
-    FPM.addPass(AssumeSimplifyPass());
-
-  // Hoisting of scalars and load expressions.
-  if (EnableGVNHoist)
-    FPM.addPass(GVNHoistPass());
-
-  // Global value numbering based sinking.
-  if (EnableGVNSink) {
-    FPM.addPass(GVNSinkPass());
-    FPM.addPass(SimplifyCFGPass());
-  }
-
-  if (EnableConstraintElimination)
-    FPM.addPass(ConstraintEliminationPass());
-
-  // Speculative execution if the target has divergent branches; otherwise nop.
-  FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
-
-  // Optimize based on known information about branches, and cleanup afterward.
-  FPM.addPass(JumpThreadingPass());
-  FPM.addPass(CorrelatedValuePropagationPass());
-
-  FPM.addPass(SimplifyCFGPass());
-  if (Level == OptimizationLevel::O3)
-    FPM.addPass(AggressiveInstCombinePass());
-  FPM.addPass(InstCombinePass());
-
-  if (!Level.isOptimizingForSize())
-    FPM.addPass(LibCallsShrinkWrapPass());
-
-  invokePeepholeEPCallbacks(FPM, Level);
-
-  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
-  // using the size value profile. Don't perform this when optimizing for size.
-  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
-      !Level.isOptimizingForSize())
-    FPM.addPass(PGOMemOPSizeOpt());
-
-  FPM.addPass(TailCallElimPass());
-  FPM.addPass(SimplifyCFGPass());
-
-  // Form canonically associated expression trees, and simplify the trees using
-  // basic mathematical properties. For example, this will form (nearly)
-  // minimal multiplication trees.
-  FPM.addPass(ReassociatePass());
-
-  // Add the primary loop simplification pipeline.
-  // FIXME: Currently this is split into two loop pass pipelines because we run
-  // some function passes in between them. These can and should be removed
-  // and/or replaced by scheduling the loop pass equivalents in the correct
-  // positions. But those equivalent passes aren't powerful enough yet.
-  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
-  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
-  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
-  // `LoopInstSimplify`.
-  LoopPassManager LPM1, LPM2;
-
-  // Simplify the loop body. We do this initially to clean up after other loop
-  // passes run, either when iterating on a loop or on inner loops with
-  // implications on the outer loop.
-  LPM1.addPass(LoopInstSimplifyPass());
-  LPM1.addPass(LoopSimplifyCFGPass());
-
-  // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
-  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-
-  // Disable header duplication in loop rotation at -Oz.
-  LPM1.addPass(
-      LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
-  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-  LPM1.addPass(
-      SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
-                             EnableO3NonTrivialUnswitching));
-  LPM2.addPass(LoopIdiomRecognizePass());
-  LPM2.addPass(IndVarSimplifyPass());
-
-  for (auto &C : LateLoopOptimizationsEPCallbacks)
-    C(LPM2, Level);
-
-  LPM2.addPass(LoopDeletionPass());
-
-  if (EnableLoopInterchange)
-    LPM2.addPass(LoopInterchangePass());
-
-  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
-  // because it changes IR to makes profile annotation in back compile
-  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
-  // attributes so we need to make sure and allow the full unroll pass to pay
-  // attention to it.
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
-      PGOOpt->Action != PGOOptions::SampleUse)
-    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
-                                    /* OnlyWhenForced= */ !PTO.LoopUnrolling,
-                                    PTO.ForgetAllSCEVInLoopUnroll));
-
-  for (auto &C : LoopOptimizerEndEPCallbacks)
-    C(LPM2, Level);
-
-  // We provide the opt remark emitter pass for LICM to use. We only need to do
-  // this once as it is immutable.
-  FPM.addPass(
-      RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
-                                              /*UseMemorySSA=*/true,
-                                              /*UseBlockFrequencyInfo=*/true));
-  FPM.addPass(SimplifyCFGPass());
-  FPM.addPass(InstCombinePass());
-  if (EnableLoopFlatten)
-    FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
-  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
-  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
-  // *All* loop passes must preserve it, in order to be able to use it.
-  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
-                                              /*UseMemorySSA=*/false,
-                                              /*UseBlockFrequencyInfo=*/false));
-
-  // Delete small array after loop unroll.
-  FPM.addPass(SROA());
-
-  // Eliminate redundancies.
-  FPM.addPass(MergedLoadStoreMotionPass());
-  if (RunNewGVN)
-    FPM.addPass(NewGVNPass());
-  else
-    FPM.addPass(GVN());
-
-  // Sparse conditional constant propagation.
-  // FIXME: It isn't clear why we do this *after* loop passes rather than
-  // before...
-  FPM.addPass(SCCPPass());
-
-  // Delete dead bit computations (instcombine runs after to fold away the dead
-  // computations, and then ADCE will run later to exploit any new DCE
-  // opportunities that creates).
-  FPM.addPass(BDCEPass());
-
-  // Run instcombine after redundancy and dead bit elimination to exploit
-  // opportunities opened up by them.
-  FPM.addPass(InstCombinePass());
-  invokePeepholeEPCallbacks(FPM, Level);
-
-  // Re-consider control flow based optimizations after redundancy elimination,
-  // redo DCE, etc.
-  if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
-    FPM.addPass(DFAJumpThreadingPass());
-
-  FPM.addPass(JumpThreadingPass());
-  FPM.addPass(CorrelatedValuePropagationPass());
-
-  // Finally, do an expensive DCE pass to catch all the dead code exposed by
-  // the simplifications and basic cleanup after all the simplifications.
-  // TODO: Investigate if this is too expensive.
-  FPM.addPass(ADCEPass());
-
-  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
-  FPM.addPass(MemCpyOptPass());
-
-  FPM.addPass(DSEPass());
-  FPM.addPass(createFunctionToLoopPassAdaptor(
-      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
-      /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
-
-  FPM.addPass(CoroElidePass());
-
-  for (auto &C : ScalarOptimizerLateEPCallbacks)
-    C(FPM, Level);
-
-  FPM.addPass(SimplifyCFGPass(
-      SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
-  FPM.addPass(InstCombinePass());
-  invokePeepholeEPCallbacks(FPM, Level);
-
-  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
-      (PGOOpt->Action == PGOOptions::IRUse ||
-       PGOOpt->Action == PGOOptions::SampleUse))
-    FPM.addPass(ControlHeightReductionPass());
-
-  return FPM;
-}
-
-void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
-  MPM.addPass(CanonicalizeAliasesPass());
-  MPM.addPass(NameAnonGlobalPass());
-}
-
-void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
-                                    OptimizationLevel Level, bool RunProfileGen,
-                                    bool IsCS, std::string ProfileFile,
-                                    std::string ProfileRemappingFile) {
-  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
-  if (!IsCS && !DisablePreInliner) {
-    InlineParams IP;
-
-    IP.DefaultThreshold = PreInlineThreshold;
-
-    // FIXME: The hint threshold has the same value used by the regular inliner
-    // when not optimzing for size. This should probably be lowered after
-    // performance testing.
-    // FIXME: this comment is cargo culted from the old pass manager, revisit).
-    IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
-    ModuleInlinerWrapperPass MIWP(IP);
-    CGSCCPassManager &CGPipeline = MIWP.getPM();
-
-    FunctionPassManager FPM;
-    FPM.addPass(SROA());
-    FPM.addPass(EarlyCSEPass());    // Catch trivial redundancies.
-    FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
-    FPM.addPass(InstCombinePass()); // Combine silly sequences.
-    invokePeepholeEPCallbacks(FPM, Level);
-
-    CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
-
-    MPM.addPass(std::move(MIWP));
-
-    // Delete anything that is now dead to make sure that we don't instrument
-    // dead code. Instrumentation can end up keeping dead code around and
-    // dramatically increase code size.
-    MPM.addPass(GlobalDCEPass());
-  }
-
-  if (!RunProfileGen) {
-    assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
-    MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
-    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
-    // RequireAnalysisPass for PSI before subsequent non-module passes.
-    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-    return;
-  }
-
-  // Perform PGO instrumentation.
-  MPM.addPass(PGOInstrumentationGen(IsCS));
-
-  FunctionPassManager FPM;
-  // Disable header duplication in loop rotation at -Oz.
-  FPM.addPass(createFunctionToLoopPassAdaptor(
-      LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
-      /*UseBlockFrequencyInfo=*/false));
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-
-  // Add the profile lowering pass.
-  InstrProfOptions Options;
-  if (!ProfileFile.empty())
-    Options.InstrProfileOutput = ProfileFile;
-  // Do counter promotion at Level greater than O0.
-  Options.DoCounterPromotion = true;
-  Options.UseBFIInPromotion = IsCS;
-  MPM.addPass(InstrProfiling(Options, IsCS));
-}
-
-void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
-                                         bool RunProfileGen, bool IsCS,
-                                         std::string ProfileFile,
-                                         std::string ProfileRemappingFile) {
-  if (!RunProfileGen) {
-    assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
-    MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
-    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
-    // RequireAnalysisPass for PSI before subsequent non-module passes.
-    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-    return;
-  }
-
-  // Perform PGO instrumentation.
-  MPM.addPass(PGOInstrumentationGen(IsCS));
-  // Add the profile lowering pass.
-  InstrProfOptions Options;
-  if (!ProfileFile.empty())
-    Options.InstrProfileOutput = ProfileFile;
-  // Do not do counter promotion at O0.
-  Options.DoCounterPromotion = false;
-  Options.UseBFIInPromotion = IsCS;
-  MPM.addPass(InstrProfiling(Options, IsCS));
-}
-
-static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
-  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
-}
-
-ModuleInlinerWrapperPass
-PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
-                                  ThinOrFullLTOPhase Phase) {
-  InlineParams IP = getInlineParamsFromOptLevel(Level);
-  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
-      PGOOpt->Action == PGOOptions::SampleUse)
-    IP.HotCallSiteThreshold = 0;
-
-  if (PGOOpt)
-    IP.EnableDeferral = EnablePGOInlineDeferral;
-
-  ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
-                                UseInlineAdvisor, MaxDevirtIterations);
-
-  // Require the GlobalsAA analysis for the module so we can query it within
-  // the CGSCC pipeline.
-  MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
-  // Invalidate AAManager so it can be recreated and pick up the newly available
-  // GlobalsAA.
-  MIWP.addModulePass(
-      createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
-
-  // Require the ProfileSummaryAnalysis for the module so we can query it within
-  // the inliner pass.
-  MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-
-  // Now begin the main postorder CGSCC pipeline.
-  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
-  // manager and trying to emulate its precise behavior. Much of this doesn't
-  // make a lot of sense and we should revisit the core CGSCC structure.
-  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
-
-  // Note: historically, the PruneEH pass was run first to deduce nounwind and
-  // generally clean up exception handling overhead. It isn't clear this is
-  // valuable as the inliner doesn't currently care whether it is inlining an
-  // invoke or a call.
-
-  if (AttributorRun & AttributorRunOption::CGSCC)
-    MainCGPipeline.addPass(AttributorCGSCCPass());
-
-  // Now deduce any function attributes based in the current code.
-  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
-
-  // When at O3 add argument promotion to the pass pipeline.
-  // FIXME: It isn't at all clear why this should be limited to O3.
-  if (Level == OptimizationLevel::O3)
-    MainCGPipeline.addPass(ArgumentPromotionPass());
-
-  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
-  // there are no OpenMP runtime calls present in the module.
-  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
-    MainCGPipeline.addPass(OpenMPOptCGSCCPass());
-
-  for (auto &C : CGSCCOptimizerLateEPCallbacks)
-    C(MainCGPipeline, Level);
-
-  // Lastly, add the core function simplification pipeline nested inside the
-  // CGSCC walk.
-  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
-      buildFunctionSimplificationPipeline(Level, Phase)));
-
-  MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
-
-  return MIWP;
-}
-
-ModulePassManager
-PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
-                                               ThinOrFullLTOPhase Phase) {
-  ModulePassManager MPM;
-
-  // Place pseudo probe instrumentation as the first pass of the pipeline to
-  // minimize the impact of optimization changes.
-  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
-      Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
-    MPM.addPass(SampleProfileProbePass(TM));
-
-  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
-
-  // In ThinLTO mode, when flattened profile is used, all the available
-  // profile information will be annotated in PreLink phase so there is
-  // no need to load the profile again in PostLink.
-  bool LoadSampleProfile =
-      HasSampleProfile &&
-      !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
-
-  // During the ThinLTO backend phase we perform early indirect call promotion
-  // here, before globalopt. Otherwise imported available_externally functions
-  // look unreferenced and are removed. If we are going to load the sample
-  // profile then defer until later.
-  // TODO: See if we can move later and consolidate with the location where
-  // we perform ICP when we are loading a sample profile.
-  // TODO: We pass HasSampleProfile (whether there was a sample profile file
-  // passed to the compile) to the SamplePGO flag of ICP. This is used to
-  // determine whether the new direct calls are annotated with prof metadata.
-  // Ideally this should be determined from whether the IR is annotated with
-  // sample profile, and not whether the a sample profile was provided on the
-  // command line. E.g. for flattened profiles where we will not be reloading
-  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
-  // provide the sample profile file.
-  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
-    MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
-
-  // Do basic inference of function attributes from known properties of system
-  // libraries and other oracles.
-  MPM.addPass(InferFunctionAttrsPass());
-
-  // Create an early function pass manager to cleanup the output of the
-  // frontend.
-  FunctionPassManager EarlyFPM;
-  // Lower llvm.expect to metadata before attempting transforms.
-  // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
-  EarlyFPM.addPass(LowerExpectIntrinsicPass());
-  EarlyFPM.addPass(SimplifyCFGPass());
-  EarlyFPM.addPass(SROA());
-  EarlyFPM.addPass(EarlyCSEPass());
-  EarlyFPM.addPass(CoroEarlyPass());
-  if (Level == OptimizationLevel::O3)
-    EarlyFPM.addPass(CallSiteSplittingPass());
-
-  // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
-  // to convert bitcast to direct calls so that they can be inlined during the
-  // profile annotation prepration step.
-  // More details about SamplePGO design can be found in:
-  // https://research.google.com/pubs/pub45290.html
-  // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
-  if (LoadSampleProfile)
-    EarlyFPM.addPass(InstCombinePass());
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
-
-  if (LoadSampleProfile) {
-    // Annotate sample profile right after early FPM to ensure freshness of
-    // the debug info.
-    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
-                                        PGOOpt->ProfileRemappingFile, Phase));
-    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
-    // RequireAnalysisPass for PSI before subsequent non-module passes.
-    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-    // Do not invoke ICP in the LTOPrelink phase as it makes it hard
-    // for the profile annotation to be accurate in the LTO backend.
-    if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
-        Phase != ThinOrFullLTOPhase::FullLTOPreLink)
-      // We perform early indirect call promotion here, before globalopt.
-      // This is important for the ThinLTO backend phase because otherwise
-      // imported available_externally functions look unreferenced and are
-      // removed.
-      MPM.addPass(
-          PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
-  }
-
-  // Try to perform OpenMP specific optimizations on the module. This is a
-  // (quick!) no-op if there are no OpenMP runtime calls present in the module.
-  if (Level != OptimizationLevel::O0)
-    MPM.addPass(OpenMPOptPass());
-
-  if (AttributorRun & AttributorRunOption::MODULE)
-    MPM.addPass(AttributorPass());
-
-  // Lower type metadata and the type.test intrinsic in the ThinLTO
-  // post link pipeline after ICP. This is to enable usage of the type
-  // tests in ICP sequences.
-  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
-    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
-  for (auto &C : PipelineEarlySimplificationEPCallbacks)
-    C(MPM, Level);
-
-  // Specialize functions with IPSCCP.
-  if (EnableFunctionSpecialization)
-    MPM.addPass(FunctionSpecializationPass());
-
-  // Interprocedural constant propagation now that basic cleanup has occurred
-  // and prior to optimizing globals.
-  // FIXME: This position in the pipeline hasn't been carefully considered in
-  // years, it should be re-analyzed.
-  MPM.addPass(IPSCCPPass());
-
-  // Attach metadata to indirect call sites indicating the set of functions
-  // they may target at run-time. This should follow IPSCCP.
-  MPM.addPass(CalledValuePropagationPass());
-
-  // Optimize globals to try and fold them into constants.
-  MPM.addPass(GlobalOptPass());
-
-  // Promote any localized globals to SSA registers.
-  // FIXME: Should this instead by a run of SROA?
-  // FIXME: We should probably run instcombine and simplifycfg afterward to
-  // delete control flows that are dead once globals have been folded to
-  // constants.
-  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
-
-  // Remove any dead arguments exposed by cleanups and constant folding
-  // globals.
-  MPM.addPass(DeadArgumentEliminationPass());
-
-  // Create a small function pass pipeline to cleanup after all the global
-  // optimizations.
-  FunctionPassManager GlobalCleanupPM;
-  GlobalCleanupPM.addPass(InstCombinePass());
-  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
-
-  GlobalCleanupPM.addPass(SimplifyCFGPass());
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
-
-  // Add all the requested passes for instrumentation PGO, if requested.
-  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
-      (PGOOpt->Action == PGOOptions::IRInstr ||
-       PGOOpt->Action == PGOOptions::IRUse)) {
-    addPGOInstrPasses(MPM, Level,
-                      /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
-                      /* IsCS */ false, PGOOpt->ProfileFile,
-                      PGOOpt->ProfileRemappingFile);
-    MPM.addPass(PGOIndirectCallPromotion(false, false));
-  }
-  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
-      PGOOpt->CSAction == PGOOptions::CSIRInstr)
-    MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
-
-  // Synthesize function entry counts for non-PGO compilation.
-  if (EnableSyntheticCounts && !PGOOpt)
-    MPM.addPass(SyntheticCountsPropagation());
-
-  MPM.addPass(buildInlinerPipeline(Level, Phase));
-
-  if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
-    MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
-    MPM.addPass(ModuleMemProfilerPass());
-  }
-
-  return MPM;
-}
-
-/// TODO: Should LTO cause any 
diff erences to this set of passes?
-void PassBuilder::addVectorPasses(OptimizationLevel Level,
-                                  FunctionPassManager &FPM, bool IsFullLTO) {
-  FPM.addPass(LoopVectorizePass(
-      LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
-
-  if (IsFullLTO) {
-    // The vectorizer may have significantly shortened a loop body; unroll
-    // again. Unroll small loops to hide loop backedge latency and saturate any
-    // parallel execution resources of an out-of-order processor. We also then
-    // need to clean up redundancies and loop invariant code.
-    // FIXME: It would be really good to use a loop-integrated instruction
-    // combiner for cleanup here so that the unrolling and LICM can be pipelined
-    // across the loop nests.
-    // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
-    if (EnableUnrollAndJam && PTO.LoopUnrolling)
-      FPM.addPass(createFunctionToLoopPassAdaptor(
-          LoopUnrollAndJamPass(Level.getSpeedupLevel())));
-    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
-        Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
-        PTO.ForgetAllSCEVInLoopUnroll)));
-    FPM.addPass(WarnMissedTransformationsPass());
-  }
-
-  if (!IsFullLTO) {
-    // Eliminate loads by forwarding stores from the previous iteration to loads
-    // of the current iteration.
-    FPM.addPass(LoopLoadEliminationPass());
-  }
-  // Cleanup after the loop optimization passes.
-  FPM.addPass(InstCombinePass());
-
-  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
-    // At higher optimization levels, try to clean up any runtime overlap and
-    // alignment checks inserted by the vectorizer. We want to track correlated
-    // runtime checks for two inner loops in the same outer loop, fold any
-    // common computations, hoist loop-invariant aspects out of any outer loop,
-    // and unswitch the runtime checks if possible. Once hoisted, we may have
-    // dead (or speculatable) control flows or more combining opportunities.
-    FPM.addPass(EarlyCSEPass());
-    FPM.addPass(CorrelatedValuePropagationPass());
-    FPM.addPass(InstCombinePass());
-    LoopPassManager LPM;
-    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-    LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
-                                       OptimizationLevel::O3));
-    FPM.addPass(
-        RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-    FPM.addPass(
-        createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
-                                        /*UseBlockFrequencyInfo=*/true));
-    FPM.addPass(SimplifyCFGPass());
-    FPM.addPass(InstCombinePass());
-  }
-
-  // Now that we've formed fast to execute loop structures, we do further
-  // optimizations. These are run afterward as they might block doing complex
-  // analyses and transforms such as what are needed for loop vectorization.
-
-  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
-  // GVN, loop transforms, and others have already run, so it's now better to
-  // convert to more optimized IR using more aggressive simplify CFG options.
-  // The extra sinking transform can create larger basic blocks, so do this
-  // before SLP vectorization.
-  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
-                                  .forwardSwitchCondToPhi(true)
-                                  .convertSwitchToLookupTable(true)
-                                  .needCanonicalLoops(false)
-                                  .hoistCommonInsts(true)
-                                  .sinkCommonInsts(true)));
-
-  if (IsFullLTO) {
-    FPM.addPass(SCCPPass());
-    FPM.addPass(InstCombinePass());
-    FPM.addPass(BDCEPass());
-  }
-
-  // Optimize parallel scalar instruction chains into SIMD instructions.
-  if (PTO.SLPVectorization) {
-    FPM.addPass(SLPVectorizerPass());
-    if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
-      FPM.addPass(EarlyCSEPass());
-    }
-  }
-  // Enhance/cleanup vector code.
-  FPM.addPass(VectorCombinePass());
-
-  if (!IsFullLTO) {
-    FPM.addPass(InstCombinePass());
-    // Unroll small loops to hide loop backedge latency and saturate any
-    // parallel execution resources of an out-of-order processor. We also then
-    // need to clean up redundancies and loop invariant code.
-    // FIXME: It would be really good to use a loop-integrated instruction
-    // combiner for cleanup here so that the unrolling and LICM can be pipelined
-    // across the loop nests.
-    // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
-    if (EnableUnrollAndJam && PTO.LoopUnrolling) {
-      FPM.addPass(createFunctionToLoopPassAdaptor(
-          LoopUnrollAndJamPass(Level.getSpeedupLevel())));
-    }
-    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
-        Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
-        PTO.ForgetAllSCEVInLoopUnroll)));
-    FPM.addPass(WarnMissedTransformationsPass());
-    FPM.addPass(InstCombinePass());
-    FPM.addPass(
-        RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-    FPM.addPass(createFunctionToLoopPassAdaptor(
-        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
-        /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
-  }
-
-  // Now that we've vectorized and unrolled loops, we may have more refined
-  // alignment information, try to re-derive it here.
-  FPM.addPass(AlignmentFromAssumptionsPass());
-
-  if (IsFullLTO)
-    FPM.addPass(InstCombinePass());
-}
-
-ModulePassManager
-PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
-                                             bool LTOPreLink) {
-  ModulePassManager MPM;
-
-  // Optimize globals now that the module is fully simplified.
-  MPM.addPass(GlobalOptPass());
-  MPM.addPass(GlobalDCEPass());
-
-  // Run partial inlining pass to partially inline functions that have
-  // large bodies.
-  if (RunPartialInlining)
-    MPM.addPass(PartialInlinerPass());
-
-  // Remove avail extern fns and globals definitions since we aren't compiling
-  // an object file for later LTO. For LTO we want to preserve these so they
-  // are eligible for inlining at link-time. Note if they are unreferenced they
-  // will be removed by GlobalDCE later, so this only impacts referenced
-  // available externally globals. Eventually they will be suppressed during
-  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
-  // may make globals referenced by available external functions dead and saves
-  // running remaining passes on the eliminated functions. These should be
-  // preserved during prelinking for link-time inlining decisions.
-  if (!LTOPreLink)
-    MPM.addPass(EliminateAvailableExternallyPass());
-
-  if (EnableOrderFileInstrumentation)
-    MPM.addPass(InstrOrderFilePass());
-
-  // Do RPO function attribute inference across the module to forward-propagate
-  // attributes where applicable.
-  // FIXME: Is this really an optimization rather than a canonicalization?
-  MPM.addPass(ReversePostOrderFunctionAttrsPass());
-
-  // Do a post inline PGO instrumentation and use pass. This is a context
-  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
-  // cross-module inline has not been done yet. The context sensitive
-  // instrumentation is after all the inlines are done.
-  if (!LTOPreLink && PGOOpt) {
-    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
-      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
-                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
-                        PGOOpt->ProfileRemappingFile);
-    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
-      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
-                        /* IsCS */ true, PGOOpt->ProfileFile,
-                        PGOOpt->ProfileRemappingFile);
-  }
-
-  // Re-require GloblasAA here prior to function passes. This is particularly
-  // useful as the above will have inlined, DCE'ed, and function-attr
-  // propagated everything. We should at this point have a reasonably minimal
-  // and richly annotated call graph. By computing aliasing and mod/ref
-  // information for all local globals here, the late loop passes and notably
-  // the vectorizer will be able to use them to help recognize vectorizable
-  // memory operations.
-  MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
-
-  FunctionPassManager OptimizePM;
-  OptimizePM.addPass(Float2IntPass());
-  OptimizePM.addPass(LowerConstantIntrinsicsPass());
-
-  if (EnableMatrix) {
-    OptimizePM.addPass(LowerMatrixIntrinsicsPass());
-    OptimizePM.addPass(EarlyCSEPass());
-  }
-
-  // FIXME: We need to run some loop optimizations to re-rotate loops after
-  // simplifycfg and others undo their rotation.
-
-  // Optimize the loop execution. These passes operate on entire loop nests
-  // rather than on each loop in an inside-out manner, and so they are actually
-  // function passes.
-
-  for (auto &C : VectorizerStartEPCallbacks)
-    C(OptimizePM, Level);
-
-  // First rotate loops that may have been un-rotated by prior passes.
-  // Disable header duplication at -Oz.
-  OptimizePM.addPass(createFunctionToLoopPassAdaptor(
-      LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
-      /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
-
-  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
-  // into separate loop that would otherwise inhibit vectorization.  This is
-  // currently only performed for loops marked with the metadata
-  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
-  OptimizePM.addPass(LoopDistributePass());
-
-  // Populates the VFABI attribute with the scalar-to-vector mappings
-  // from the TargetLibraryInfo.
-  OptimizePM.addPass(InjectTLIMappings());
-
-  addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
-
-  // Split out cold code. Splitting is done late to avoid hiding context from
-  // other optimizations and inadvertently regressing performance. The tradeoff
-  // is that this has a higher code size cost than splitting early.
-  if (EnableHotColdSplit && !LTOPreLink)
-    MPM.addPass(HotColdSplittingPass());
-
-  // Search the code for similar regions of code. If enough similar regions can
-  // be found where extracting the regions into their own function will decrease
-  // the size of the program, we extract the regions, a deduplicate the
-  // structurally similar regions.
-  if (EnableIROutliner)
-    MPM.addPass(IROutlinerPass());
-
-  // Merge functions if requested.
-  if (PTO.MergeFunctions)
-    MPM.addPass(MergeFunctionsPass());
-
-  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
-  // canonicalization pass that enables other optimizations. As a result,
-  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
-  // result too early.
-  OptimizePM.addPass(LoopSinkPass());
-
-  // And finally clean up LCSSA form before generating code.
-  OptimizePM.addPass(InstSimplifyPass());
-
-  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
-  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
-  // flattening of blocks.
-  OptimizePM.addPass(DivRemPairsPass());
-
-  // LoopSink (and other loop passes since the last simplifyCFG) might have
-  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
-  OptimizePM.addPass(SimplifyCFGPass());
-
-  OptimizePM.addPass(CoroCleanupPass());
-
-  // Add the core optimizing pipeline.
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
-
-  for (auto &C : OptimizerLastEPCallbacks)
-    C(MPM, Level);
-
-  if (PTO.CallGraphProfile)
-    MPM.addPass(CGProfilePass());
-
-  // Now we need to do some global optimization transforms.
-  // FIXME: It would seem like these should come first in the optimization
-  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
-  // ordering here.
-  MPM.addPass(GlobalDCEPass());
-  MPM.addPass(ConstantMergePass());
-
-  // TODO: Relative look table converter pass caused an issue when full lto is
-  // enabled. See https://reviews.llvm.org/D94355 for more details.
-  // Until the issue fixed, disable this pass during pre-linking phase.
-  if (!LTOPreLink)
-    MPM.addPass(RelLookupTableConverterPass());
-
-  return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
-                                           bool LTOPreLink) {
-  assert(Level != OptimizationLevel::O0 &&
-         "Must request optimizations for the default pipeline!");
-
-  ModulePassManager MPM;
-
-  // Convert @llvm.global.annotations to !annotation metadata.
-  MPM.addPass(Annotation2MetadataPass());
-
-  // Force any function attributes we want the rest of the pipeline to observe.
-  MPM.addPass(ForceFunctionAttrsPass());
-
-  // Apply module pipeline start EP callback.
-  for (auto &C : PipelineStartEPCallbacks)
-    C(MPM, Level);
-
-  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
-    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
-  // Add the core simplification pipeline.
-  MPM.addPass(buildModuleSimplificationPipeline(
-      Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
-                        : ThinOrFullLTOPhase::None));
-
-  // Now add the optimization pipeline.
-  MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
-
-  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
-    MPM.addPass(PseudoProbeUpdatePass());
-
-  // Emit annotation remarks.
-  addAnnotationRemarksPass(MPM);
-
-  if (LTOPreLink)
-    addRequiredLTOPreLinkPasses(MPM);
-
-  return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
-  assert(Level != OptimizationLevel::O0 &&
-         "Must request optimizations for the default pipeline!");
-
-  ModulePassManager MPM;
-
-  // Convert @llvm.global.annotations to !annotation metadata.
-  MPM.addPass(Annotation2MetadataPass());
-
-  // Force any function attributes we want the rest of the pipeline to observe.
-  MPM.addPass(ForceFunctionAttrsPass());
-
-  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
-    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
-  // Apply module pipeline start EP callback.
-  for (auto &C : PipelineStartEPCallbacks)
-    C(MPM, Level);
-
-  // If we are planning to perform ThinLTO later, we don't bloat the code with
-  // unrolling/vectorization/... now. Just simplify the module as much as we
-  // can.
-  MPM.addPass(buildModuleSimplificationPipeline(
-      Level, ThinOrFullLTOPhase::ThinLTOPreLink));
-
-  // Run partial inlining pass to partially inline functions that have
-  // large bodies.
-  // FIXME: It isn't clear whether this is really the right place to run this
-  // in ThinLTO. Because there is another canonicalization and simplification
-  // phase that will run after the thin link, running this here ends up with
-  // less information than will be available later and it may grow functions in
-  // ways that aren't beneficial.
-  if (RunPartialInlining)
-    MPM.addPass(PartialInlinerPass());
-
-  // Reduce the size of the IR as much as possible.
-  MPM.addPass(GlobalOptPass());
-
-  // Module simplification splits coroutines, but does not fully clean up
-  // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
-  // on these, we schedule the cleanup here.
-  MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
-
-  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
-    MPM.addPass(PseudoProbeUpdatePass());
-
-  // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
-  // optimization is going to be done in PostLink stage, but clang can't
-  // add callbacks there in case of in-process ThinLTO called by linker.
-  for (auto &C : OptimizerLastEPCallbacks)
-    C(MPM, Level);
-
-  // Emit annotation remarks.
-  addAnnotationRemarksPass(MPM);
-
-  addRequiredLTOPreLinkPasses(MPM);
-
-  return MPM;
-}
-
-ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
-    OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
-  ModulePassManager MPM;
-
-  // Convert @llvm.global.annotations to !annotation metadata.
-  MPM.addPass(Annotation2MetadataPass());
-
-  if (ImportSummary) {
-    // These passes import type identifier resolutions for whole-program
-    // devirtualization and CFI. They must run early because other passes may
-    // disturb the specific instruction patterns that these passes look for,
-    // creating dependencies on resolutions that may not appear in the summary.
-    //
-    // For example, GVN may transform the pattern assume(type.test) appearing in
-    // two basic blocks into assume(phi(type.test, type.test)), which would
-    // transform a dependency on a WPD resolution into a dependency on a type
-    // identifier resolution for CFI.
-    //
-    // Also, WPD has access to more precise information than ICP and can
-    // devirtualize more effectively, so it should operate on the IR first.
-    //
-    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
-    // metadata and intrinsics.
-    MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
-    MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
-  }
-
-  if (Level == OptimizationLevel::O0) {
-    // Run a second time to clean up any type tests left behind by WPD for use
-    // in ICP.
-    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-    // Drop available_externally and unreferenced globals. This is necessary
-    // with ThinLTO in order to avoid leaving undefined references to dead
-    // globals in the object file.
-    MPM.addPass(EliminateAvailableExternallyPass());
-    MPM.addPass(GlobalDCEPass());
-    return MPM;
-  }
-
-  // Force any function attributes we want the rest of the pipeline to observe.
-  MPM.addPass(ForceFunctionAttrsPass());
-
-  // Add the core simplification pipeline.
-  MPM.addPass(buildModuleSimplificationPipeline(
-      Level, ThinOrFullLTOPhase::ThinLTOPostLink));
-
-  // Now add the optimization pipeline.
-  MPM.addPass(buildModuleOptimizationPipeline(Level));
-
-  // Emit annotation remarks.
-  addAnnotationRemarksPass(MPM);
-
-  return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
-  assert(Level != OptimizationLevel::O0 &&
-         "Must request optimizations for the default pipeline!");
-  // FIXME: We should use a customized pre-link pipeline!
-  return buildPerModuleDefaultPipeline(Level,
-                                       /* LTOPreLink */ true);
-}
-
-ModulePassManager
-PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
-                                     ModuleSummaryIndex *ExportSummary) {
-  ModulePassManager MPM;
-
-  // Convert @llvm.global.annotations to !annotation metadata.
-  MPM.addPass(Annotation2MetadataPass());
-
-  // Create a function that performs CFI checks for cross-DSO calls with targets
-  // in the current module.
-  MPM.addPass(CrossDSOCFIPass());
-
-  if (Level == OptimizationLevel::O0) {
-    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
-    // metadata and intrinsics.
-    MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
-    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
-    // Run a second time to clean up any type tests left behind by WPD for use
-    // in ICP.
-    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
-    // Emit annotation remarks.
-    addAnnotationRemarksPass(MPM);
-
-    return MPM;
-  }
-
-  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
-    // Load sample profile before running the LTO optimization pipeline.
-    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
-                                        PGOOpt->ProfileRemappingFile,
-                                        ThinOrFullLTOPhase::FullLTOPostLink));
-    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
-    // RequireAnalysisPass for PSI before subsequent non-module passes.
-    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-  }
-
-  // Remove unused virtual tables to improve the quality of code generated by
-  // whole-program devirtualization and bitset lowering.
-  MPM.addPass(GlobalDCEPass());
-
-  // Force any function attributes we want the rest of the pipeline to observe.
-  MPM.addPass(ForceFunctionAttrsPass());
-
-  // Do basic inference of function attributes from known properties of system
-  // libraries and other oracles.
-  MPM.addPass(InferFunctionAttrsPass());
-
-  if (Level.getSpeedupLevel() > 1) {
-    FunctionPassManager EarlyFPM;
-    EarlyFPM.addPass(CallSiteSplittingPass());
-    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
-
-    // Indirect call promotion. This should promote all the targets that are
-    // left by the earlier promotion pass that promotes intra-module targets.
-    // This two-step promotion is to save the compile time. For LTO, it should
-    // produce the same result as if we only do promotion here.
-    MPM.addPass(PGOIndirectCallPromotion(
-        true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
-
-    if (EnableFunctionSpecialization)
-      MPM.addPass(FunctionSpecializationPass());
-    // Propagate constants at call sites into the functions they call.  This
-    // opens opportunities for globalopt (and inlining) by substituting function
-    // pointers passed as arguments to direct uses of functions.
-    MPM.addPass(IPSCCPPass());
-
-    // Attach metadata to indirect call sites indicating the set of functions
-    // they may target at run-time. This should follow IPSCCP.
-    MPM.addPass(CalledValuePropagationPass());
-  }
-
-  // Now deduce any function attributes based in the current code.
-  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
-              PostOrderFunctionAttrsPass()));
-
-  // Do RPO function attribute inference across the module to forward-propagate
-  // attributes where applicable.
-  // FIXME: Is this really an optimization rather than a canonicalization?
-  MPM.addPass(ReversePostOrderFunctionAttrsPass());
-
-  // Use in-range annotations on GEP indices to split globals where beneficial.
-  MPM.addPass(GlobalSplitPass());
-
-  // Run whole program optimization of virtual call when the list of callees
-  // is fixed.
-  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
-
-  // Stop here at -O1.
-  if (Level == OptimizationLevel::O1) {
-    // The LowerTypeTestsPass needs to run to lower type metadata and the
-    // type.test intrinsics. The pass does nothing if CFI is disabled.
-    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
-    // Run a second time to clean up any type tests left behind by WPD for use
-    // in ICP (which is performed earlier than this in the regular LTO
-    // pipeline).
-    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
-    // Emit annotation remarks.
-    addAnnotationRemarksPass(MPM);
-
-    return MPM;
-  }
-
-  // Optimize globals to try and fold them into constants.
-  MPM.addPass(GlobalOptPass());
-
-  // Promote any localized globals to SSA registers.
-  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
-
-  // Linking modules together can lead to duplicate global constant, only
-  // keep one copy of each constant.
-  MPM.addPass(ConstantMergePass());
-
-  // Remove unused arguments from functions.
-  MPM.addPass(DeadArgumentEliminationPass());
-
-  // Reduce the code after globalopt and ipsccp.  Both can open up significant
-  // simplification opportunities, and both can propagate functions through
-  // function pointers.  When this happens, we often have to resolve varargs
-  // calls, etc, so let instcombine do this.
-  FunctionPassManager PeepholeFPM;
-  if (Level == OptimizationLevel::O3)
-    PeepholeFPM.addPass(AggressiveInstCombinePass());
-  PeepholeFPM.addPass(InstCombinePass());
-  invokePeepholeEPCallbacks(PeepholeFPM, Level);
-
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
-
-  // Note: historically, the PruneEH pass was run first to deduce nounwind and
-  // generally clean up exception handling overhead. It isn't clear this is
-  // valuable as the inliner doesn't currently care whether it is inlining an
-  // invoke or a call.
-  // Run the inliner now.
-  MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
-
-  // Optimize globals again after we ran the inliner.
-  MPM.addPass(GlobalOptPass());
-
-  // Garbage collect dead functions.
-  MPM.addPass(GlobalDCEPass());
-
-  // If we didn't decide to inline a function, check to see if we can
-  // transform it to pass arguments by value instead of by reference.
-  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
-
-  FunctionPassManager FPM;
-  // The IPO Passes may leave cruft around. Clean up after them.
-  FPM.addPass(InstCombinePass());
-  invokePeepholeEPCallbacks(FPM, Level);
-
-  FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
-
-  // Do a post inline PGO instrumentation and use pass. This is a context
-  // sensitive PGO pass.
-  if (PGOOpt) {
-    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
-      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
-                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
-                        PGOOpt->ProfileRemappingFile);
-    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
-      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
-                        /* IsCS */ true, PGOOpt->ProfileFile,
-                        PGOOpt->ProfileRemappingFile);
-  }
-
-  // Break up allocas
-  FPM.addPass(SROA());
-
-  // LTO provides additional opportunities for tailcall elimination due to
-  // link-time inlining, and visibility of nocapture attribute.
-  FPM.addPass(TailCallElimPass());
-
-  // Run a few AA driver optimizations here and now to cleanup the code.
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-
-  MPM.addPass(
-      createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
-
-  // Require the GlobalsAA analysis for the module so we can query it within
-  // MainFPM.
-  MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
-  // Invalidate AAManager so it can be recreated and pick up the newly available
-  // GlobalsAA.
-  MPM.addPass(
-      createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
-
-  FunctionPassManager MainFPM;
-  MainFPM.addPass(createFunctionToLoopPassAdaptor(
-      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
-      /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
-
-  if (RunNewGVN)
-    MainFPM.addPass(NewGVNPass());
-  else
-    MainFPM.addPass(GVN());
-
-  // Remove dead memcpy()'s.
-  MainFPM.addPass(MemCpyOptPass());
-
-  // Nuke dead stores.
-  MainFPM.addPass(DSEPass());
-  MainFPM.addPass(MergedLoadStoreMotionPass());
-
-  // More loops are countable; try to optimize them.
-  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
-    MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
-
-  if (EnableConstraintElimination)
-    MainFPM.addPass(ConstraintEliminationPass());
-
-  LoopPassManager LPM;
-  LPM.addPass(IndVarSimplifyPass());
-  LPM.addPass(LoopDeletionPass());
-  // FIXME: Add loop interchange.
-
-  // Unroll small loops and perform peeling.
-  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
-                                 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
-                                 PTO.ForgetAllSCEVInLoopUnroll));
-  // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
-  // *All* loop passes must preserve it, in order to be able to use it.
-  MainFPM.addPass(createFunctionToLoopPassAdaptor(
-      std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
-
-  MainFPM.addPass(LoopDistributePass());
-
-  addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
-
-  invokePeepholeEPCallbacks(MainFPM, Level);
-  MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
-  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
-
-  // Lower type metadata and the type.test intrinsic. This pass supports
-  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
-  // to be run at link time if CFI is enabled. This pass does nothing if
-  // CFI is disabled.
-  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
-  // Run a second time to clean up any type tests left behind by WPD for use
-  // in ICP (which is performed earlier than this in the regular LTO pipeline).
-  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
-  // Enable splitting late in the FullLTO post-link pipeline. This is done in
-  // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
-  if (EnableHotColdSplit)
-    MPM.addPass(HotColdSplittingPass());
-
-  // Add late LTO optimization passes.
-  // Delete basic blocks, which optimization passes may have killed.
-  MPM.addPass(createModuleToFunctionPassAdaptor(
-      SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
-
-  // Drop bodies of available eternally objects to improve GlobalDCE.
-  MPM.addPass(EliminateAvailableExternallyPass());
-
-  // Now that we have optimized the program, discard unreachable functions.
-  MPM.addPass(GlobalDCEPass());
-
-  if (PTO.MergeFunctions)
-    MPM.addPass(MergeFunctionsPass());
-
-  // Emit annotation remarks.
-  addAnnotationRemarksPass(MPM);
-
-  return MPM;
-}
-
-ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
-                                                      bool LTOPreLink) {
-  assert(Level == OptimizationLevel::O0 &&
-         "buildO0DefaultPipeline should only be used with O0");
-
-  ModulePassManager MPM;
-
-  // Perform pseudo probe instrumentation in O0 mode. This is for the
-  // consistency between 
diff erent build modes. For example, a LTO build can be
-  // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
-  // the postlink will require pseudo probe instrumentation in the prelink.
-  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
-    MPM.addPass(SampleProfileProbePass(TM));
-
-  if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
-                 PGOOpt->Action == PGOOptions::IRUse))
-    addPGOInstrPassesForO0(
-        MPM,
-        /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
-        /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
-
-  for (auto &C : PipelineStartEPCallbacks)
-    C(MPM, Level);
-
-  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
-    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
-  for (auto &C : PipelineEarlySimplificationEPCallbacks)
-    C(MPM, Level);
-
-  // Build a minimal pipeline based on the semantics required by LLVM,
-  // which is just that always inlining occurs. Further, disable generating
-  // lifetime intrinsics to avoid enabling further optimizations during
-  // code generation.
-  MPM.addPass(AlwaysInlinerPass(
-      /*InsertLifetimeIntrinsics=*/false));
-
-  if (PTO.MergeFunctions)
-    MPM.addPass(MergeFunctionsPass());
-
-  if (EnableMatrix)
-    MPM.addPass(
-        createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
-
-  if (!CGSCCOptimizerLateEPCallbacks.empty()) {
-    CGSCCPassManager CGPM;
-    for (auto &C : CGSCCOptimizerLateEPCallbacks)
-      C(CGPM, Level);
-    if (!CGPM.isEmpty())
-      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
-  }
-  if (!LateLoopOptimizationsEPCallbacks.empty()) {
-    LoopPassManager LPM;
-    for (auto &C : LateLoopOptimizationsEPCallbacks)
-      C(LPM, Level);
-    if (!LPM.isEmpty()) {
-      MPM.addPass(createModuleToFunctionPassAdaptor(
-          createFunctionToLoopPassAdaptor(std::move(LPM))));
-    }
-  }
-  if (!LoopOptimizerEndEPCallbacks.empty()) {
-    LoopPassManager LPM;
-    for (auto &C : LoopOptimizerEndEPCallbacks)
-      C(LPM, Level);
-    if (!LPM.isEmpty()) {
-      MPM.addPass(createModuleToFunctionPassAdaptor(
-          createFunctionToLoopPassAdaptor(std::move(LPM))));
-    }
-  }
-  if (!ScalarOptimizerLateEPCallbacks.empty()) {
-    FunctionPassManager FPM;
-    for (auto &C : ScalarOptimizerLateEPCallbacks)
-      C(FPM, Level);
-    if (!FPM.isEmpty())
-      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-  }
-  if (!VectorizerStartEPCallbacks.empty()) {
-    FunctionPassManager FPM;
-    for (auto &C : VectorizerStartEPCallbacks)
-      C(FPM, Level);
-    if (!FPM.isEmpty())
-      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-  }
-
-  MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
-  CGSCCPassManager CGPM;
-  CGPM.addPass(CoroSplitPass());
-  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
-  MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
-
-  for (auto &C : OptimizerLastEPCallbacks)
-    C(MPM, Level);
-
-  if (LTOPreLink)
-    addRequiredLTOPreLinkPasses(MPM);
-
-  return MPM;
-}
-
-AAManager PassBuilder::buildDefaultAAPipeline() {
-  AAManager AA;
-
-  // The order in which these are registered determines their priority when
-  // being queried.
-
-  // First we register the basic alias analysis that provides the majority of
-  // per-function local AA logic. This is a stateless, on-demand local set of
-  // AA techniques.
-  AA.registerFunctionAnalysis<BasicAA>();
-
-  // Next we query fast, specialized alias analyses that wrap IR-embedded
-  // information about aliasing.
-  AA.registerFunctionAnalysis<ScopedNoAliasAA>();
-  AA.registerFunctionAnalysis<TypeBasedAA>();
-
-  // Add support for querying global aliasing information when available.
-  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
-  // analysis, all that the `AAManager` can do is query for any *cached*
-  // results from `GlobalsAA` through a readonly proxy.
-  AA.registerModuleAnalysis<GlobalsAA>();
-
-  // Add target-specific alias analyses.
-  if (TM)
-    TM->registerDefaultAliasAnalyses(AA);
-
-  return AA;
-}
-
 static Optional<int> parseRepeatPassName(StringRef Name) {
   if (!Name.consume_front("repeat<") || !Name.consume_back(">"))
     return None;

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
new file mode 100644
index 0000000000000..3cde658cf2c41
--- /dev/null
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -0,0 +1,1730 @@
+//===- Construction of pass pipelines -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of the PassBuilder based on our
+/// static pass registry as well as related functionality. It also provides
+/// helpers to aid in analyzing, debugging, and testing passes and pass
+/// pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/PGOOptions.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroCleanup.h"
+#include "llvm/Transforms/Coroutines/CoroEarly.h"
+#include "llvm/Transforms/Coroutines/CoroElide.h"
+#include "llvm/Transforms/Coroutines/CoroSplit.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/Annotation2Metadata.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/IPO/CalledValuePropagation.h"
+#include "llvm/Transforms/IPO/ConstantMerge.h"
+#include "llvm/Transforms/IPO/CrossDSOCFI.h"
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
+#include "llvm/Transforms/IPO/ElimAvailExtern.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
+#include "llvm/Transforms/IPO/GlobalOpt.h"
+#include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
+#include "llvm/Transforms/IPO/IROutliner.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
+#include "llvm/Transforms/IPO/PartialInlining.h"
+#include "llvm/Transforms/IPO/SCCP.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
+#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
+#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
+#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
+#include "llvm/Transforms/Scalar/BDCE.h"
+#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
+#include "llvm/Transforms/Scalar/ConstraintElimination.h"
+#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
+#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
+#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
+#include "llvm/Transforms/Scalar/DivRemPairs.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/Float2Int.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/IndVarSimplify.h"
+#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopDeletion.h"
+#include "llvm/Transforms/Scalar/LoopDistribute.h"
+#include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
+#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
+#include "llvm/Transforms/Scalar/LoopInterchange.h"
+#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopRotation.h"
+#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+#include "llvm/Transforms/Scalar/LoopSink.h"
+#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/NewGVN.h"
+#include "llvm/Transforms/Scalar/Reassociate.h"
+#include "llvm/Transforms/Scalar/SCCP.h"
+#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
+#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
+#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
+#include "llvm/Transforms/Vectorize/VectorCombine.h"
+
+using namespace llvm;
+
+static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
+    "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
+    cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
+    cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
+                          "Heuristics-based inliner version."),
+               clEnumValN(InliningAdvisorMode::Development, "development",
+                          "Use development mode (runtime-loadable model)."),
+               clEnumValN(InliningAdvisorMode::Release, "release",
+                          "Use release mode (AOT-compiled model).")));
+
+static cl::opt<bool> EnableSyntheticCounts(
+    "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Run synthetic function entry count generation "
+             "pass"));
+
+/// Flag to enable inline deferral during PGO.
+static cl::opt<bool>
+    EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
+                            cl::Hidden,
+                            cl::desc("Enable inline deferral during PGO"));
+
+static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
+                                       cl::Hidden, cl::ZeroOrMore,
+                                       cl::desc("Enable memory profiler"));
+
+static cl::opt<bool> PerformMandatoryInliningsFirst(
+    "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Perform mandatory inlinings module-wide, before performing "
+             "inlining."));
+
+static cl::opt<bool> EnableO3NonTrivialUnswitching(
+    "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
+    cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
+
+PipelineTuningOptions::PipelineTuningOptions() {
+  LoopInterleaving = true;
+  LoopVectorization = true;
+  SLPVectorization = false;
+  LoopUnrolling = true;
+  ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
+  LicmMssaOptCap = SetLicmMssaOptCap;
+  LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
+  CallGraphProfile = true;
+  MergeFunctions = false;
+}
+
+namespace llvm {
+
+extern cl::opt<unsigned> MaxDevirtIterations;
+extern cl::opt<bool> EnableConstraintElimination;
+extern cl::opt<bool> EnableFunctionSpecialization;
+extern cl::opt<bool> EnableGVNHoist;
+extern cl::opt<bool> EnableGVNSink;
+extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableIROutliner;
+extern cl::opt<bool> EnableOrderFileInstrumentation;
+extern cl::opt<bool> EnableCHR;
+extern cl::opt<bool> EnableLoopInterchange;
+extern cl::opt<bool> EnableUnrollAndJam;
+extern cl::opt<bool> EnableLoopFlatten;
+extern cl::opt<bool> EnableDFAJumpThreading;
+extern cl::opt<bool> RunNewGVN;
+extern cl::opt<bool> RunPartialInlining;
+extern cl::opt<bool> ExtraVectorizerPasses;
+
+extern cl::opt<bool> FlattenedProfileUsed;
+
+extern cl::opt<AttributorRunOption> AttributorRun;
+extern cl::opt<bool> EnableKnowledgeRetention;
+
+extern cl::opt<bool> EnableMatrix;
+
+extern cl::opt<bool> DisablePreInliner;
+extern cl::opt<int> PreInlineThreshold;
+} // namespace llvm
+
+void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
+                                            OptimizationLevel Level) {
+  for (auto &C : PeepholeEPCallbacks)
+    C(FPM, Level);
+}
+
+// Helper to add AnnotationRemarksPass.
+static void addAnnotationRemarksPass(ModulePassManager &MPM) {
+  FunctionPassManager FPM;
+  FPM.addPass(AnnotationRemarksPass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+}
+
+// Helper to check if the current compilation phase is preparing for LTO
+static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
+  return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
+         Phase == ThinOrFullLTOPhase::FullLTOPreLink;
+}
+
+// TODO: Investigate the cost/benefit of tail call elimination on debugging.
+FunctionPassManager
+PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
+                                                   ThinOrFullLTOPhase Phase) {
+
+  FunctionPassManager FPM;
+
+  // Form SSA out of local memory accesses after breaking apart aggregates into
+  // scalars.
+  FPM.addPass(SROA());
+
+  // Catch trivial redundancies
+  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+
+  // Hoisting of scalars and load expressions.
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+
+  FPM.addPass(LibCallsShrinkWrapPass());
+
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  FPM.addPass(SimplifyCFGPass());
+
+  // Form canonically associated expression trees, and simplify the trees using
+  // basic mathematical properties. For example, this will form (nearly)
+  // minimal multiplication trees.
+  FPM.addPass(ReassociatePass());
+
+  // Add the primary loop simplification pipeline.
+  // FIXME: Currently this is split into two loop pass pipelines because we run
+  // some function passes in between them. These can and should be removed
+  // and/or replaced by scheduling the loop pass equivalents in the correct
+  // positions. But those equivalent passes aren't powerful enough yet.
+  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+  // `LoopInstSimplify`.
+  LoopPassManager LPM1, LPM2;
+
+  // Simplify the loop body. We do this initially to clean up after other loop
+  // passes run, either when iterating on a loop or on inner loops with
+  // implications on the outer loop.
+  LPM1.addPass(LoopInstSimplifyPass());
+  LPM1.addPass(LoopSimplifyCFGPass());
+
+  // Try to remove as much code from the loop header as possible,
+  // to reduce amount of IR that will have to be duplicated.
+  // TODO: Investigate promotion cap for O1.
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+
+  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
+                              isLTOPreLink(Phase)));
+  // TODO: Investigate promotion cap for O1.
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(SimpleLoopUnswitchPass());
+
+  LPM2.addPass(LoopIdiomRecognizePass());
+  LPM2.addPass(IndVarSimplifyPass());
+
+  for (auto &C : LateLoopOptimizationsEPCallbacks)
+    C(LPM2, Level);
+
+  LPM2.addPass(LoopDeletionPass());
+
+  if (EnableLoopInterchange)
+    LPM2.addPass(LoopInterchangePass());
+
+  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+  // because it changes IR to makes profile annotation in back compile
+  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+  // attributes so we need to make sure and allow the full unroll pass to pay
+  // attention to it.
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
+      PGOOpt->Action != PGOOptions::SampleUse)
+    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+                                    /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+                                    PTO.ForgetAllSCEVInLoopUnroll));
+
+  for (auto &C : LoopOptimizerEndEPCallbacks)
+    C(LPM2, Level);
+
+  // We provide the opt remark emitter pass for LICM to use. We only need to do
+  // this once as it is immutable.
+  FPM.addPass(
+      RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
+                                              /*UseMemorySSA=*/true,
+                                              /*UseBlockFrequencyInfo=*/true));
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+  if (EnableLoopFlatten)
+    FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
+  // *All* loop passes must preserve it, in order to be able to use it.
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
+                                              /*UseMemorySSA=*/false,
+                                              /*UseBlockFrequencyInfo=*/false));
+
+  // Delete small array after loop unroll.
+  FPM.addPass(SROA());
+
+  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+  FPM.addPass(MemCpyOptPass());
+
+  // Sparse conditional constant propagation.
+  // FIXME: It isn't clear why we do this *after* loop passes rather than
+  // before...
+  FPM.addPass(SCCPPass());
+
+  // Delete dead bit computations (instcombine runs after to fold away the dead
+  // computations, and then ADCE will run later to exploit any new DCE
+  // opportunities that creates).
+  FPM.addPass(BDCEPass());
+
+  // Run instcombine after redundancy and dead bit elimination to exploit
+  // opportunities opened up by them.
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  FPM.addPass(CoroElidePass());
+
+  for (auto &C : ScalarOptimizerLateEPCallbacks)
+    C(FPM, Level);
+
+  // Finally, do an expensive DCE pass to catch all the dead code exposed by
+  // the simplifications and basic cleanup after all the simplifications.
+  // TODO: Investigate if this is too expensive.
+  FPM.addPass(ADCEPass());
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  return FPM;
+}
+
+FunctionPassManager
+PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
+                                                 ThinOrFullLTOPhase Phase) {
+  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
+
+  // The O1 pipeline has a separate pipeline creation function to simplify
+  // construction readability.
+  if (Level.getSpeedupLevel() == 1)
+    return buildO1FunctionSimplificationPipeline(Level, Phase);
+
+  FunctionPassManager FPM;
+
+  // Form SSA out of local memory accesses after breaking apart aggregates into
+  // scalars.
+  FPM.addPass(SROA());
+
+  // Catch trivial redundancies
+  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+  if (EnableKnowledgeRetention)
+    FPM.addPass(AssumeSimplifyPass());
+
+  // Hoisting of scalars and load expressions.
+  if (EnableGVNHoist)
+    FPM.addPass(GVNHoistPass());
+
+  // Global value numbering based sinking.
+  if (EnableGVNSink) {
+    FPM.addPass(GVNSinkPass());
+    FPM.addPass(SimplifyCFGPass());
+  }
+
+  if (EnableConstraintElimination)
+    FPM.addPass(ConstraintEliminationPass());
+
+  // Speculative execution if the target has divergent branches; otherwise nop.
+  FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
+
+  // Optimize based on known information about branches, and cleanup afterward.
+  FPM.addPass(JumpThreadingPass());
+  FPM.addPass(CorrelatedValuePropagationPass());
+
+  FPM.addPass(SimplifyCFGPass());
+  if (Level == OptimizationLevel::O3)
+    FPM.addPass(AggressiveInstCombinePass());
+  FPM.addPass(InstCombinePass());
+
+  if (!Level.isOptimizingForSize())
+    FPM.addPass(LibCallsShrinkWrapPass());
+
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
+  // using the size value profile. Don't perform this when optimizing for size.
+  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
+      !Level.isOptimizingForSize())
+    FPM.addPass(PGOMemOPSizeOpt());
+
+  FPM.addPass(TailCallElimPass());
+  FPM.addPass(SimplifyCFGPass());
+
+  // Form canonically associated expression trees, and simplify the trees using
+  // basic mathematical properties. For example, this will form (nearly)
+  // minimal multiplication trees.
+  FPM.addPass(ReassociatePass());
+
+  // Add the primary loop simplification pipeline.
+  // FIXME: Currently this is split into two loop pass pipelines because we run
+  // some function passes in between them. These can and should be removed
+  // and/or replaced by scheduling the loop pass equivalents in the correct
+  // positions. But those equivalent passes aren't powerful enough yet.
+  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+  // `LoopInstSimplify`.
+  LoopPassManager LPM1, LPM2;
+
+  // Simplify the loop body. We do this initially to clean up after other loop
+  // passes run, either when iterating on a loop or on inner loops with
+  // implications on the outer loop.
+  LPM1.addPass(LoopInstSimplifyPass());
+  LPM1.addPass(LoopSimplifyCFGPass());
+
+  // Try to remove as much code from the loop header as possible,
+  // to reduce amount of IR that will have to be duplicated.
+  // TODO: Investigate promotion cap for O1.
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+
+  // Disable header duplication in loop rotation at -Oz.
+  LPM1.addPass(
+      LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
+  // TODO: Investigate promotion cap for O1.
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(
+      SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
+                             EnableO3NonTrivialUnswitching));
+  LPM2.addPass(LoopIdiomRecognizePass());
+  LPM2.addPass(IndVarSimplifyPass());
+
+  for (auto &C : LateLoopOptimizationsEPCallbacks)
+    C(LPM2, Level);
+
+  LPM2.addPass(LoopDeletionPass());
+
+  if (EnableLoopInterchange)
+    LPM2.addPass(LoopInterchangePass());
+
+  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+  // because it changes IR to makes profile annotation in back compile
+  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+  // attributes so we need to make sure and allow the full unroll pass to pay
+  // attention to it.
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
+      PGOOpt->Action != PGOOptions::SampleUse)
+    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+                                    /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+                                    PTO.ForgetAllSCEVInLoopUnroll));
+
+  for (auto &C : LoopOptimizerEndEPCallbacks)
+    C(LPM2, Level);
+
+  // We provide the opt remark emitter pass for LICM to use. We only need to do
+  // this once as it is immutable.
+  FPM.addPass(
+      RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
+                                              /*UseMemorySSA=*/true,
+                                              /*UseBlockFrequencyInfo=*/true));
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+  if (EnableLoopFlatten)
+    FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
+  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
+  // *All* loop passes must preserve it, in order to be able to use it.
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
+                                              /*UseMemorySSA=*/false,
+                                              /*UseBlockFrequencyInfo=*/false));
+
+  // Delete small array after loop unroll.
+  FPM.addPass(SROA());
+
+  // Eliminate redundancies.
+  FPM.addPass(MergedLoadStoreMotionPass());
+  if (RunNewGVN)
+    FPM.addPass(NewGVNPass());
+  else
+    FPM.addPass(GVN());
+
+  // Sparse conditional constant propagation.
+  // FIXME: It isn't clear why we do this *after* loop passes rather than
+  // before...
+  FPM.addPass(SCCPPass());
+
+  // Delete dead bit computations (instcombine runs after to fold away the dead
+  // computations, and then ADCE will run later to exploit any new DCE
+  // opportunities that creates).
+  FPM.addPass(BDCEPass());
+
+  // Run instcombine after redundancy and dead bit elimination to exploit
+  // opportunities opened up by them.
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  // Re-consider control flow based optimizations after redundancy elimination,
+  // redo DCE, etc.
+  if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
+    FPM.addPass(DFAJumpThreadingPass());
+
+  FPM.addPass(JumpThreadingPass());
+  FPM.addPass(CorrelatedValuePropagationPass());
+
+  // Finally, do an expensive DCE pass to catch all the dead code exposed by
+  // the simplifications and basic cleanup after all the simplifications.
+  // TODO: Investigate if this is too expensive.
+  FPM.addPass(ADCEPass());
+
+  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+  FPM.addPass(MemCpyOptPass());
+
+  FPM.addPass(DSEPass());
+  FPM.addPass(createFunctionToLoopPassAdaptor(
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+
+  FPM.addPass(CoroElidePass());
+
+  for (auto &C : ScalarOptimizerLateEPCallbacks)
+    C(FPM, Level);
+
+  FPM.addPass(SimplifyCFGPass(
+      SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
+      (PGOOpt->Action == PGOOptions::IRUse ||
+       PGOOpt->Action == PGOOptions::SampleUse))
+    FPM.addPass(ControlHeightReductionPass());
+
+  return FPM;
+}
+
+void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
+  MPM.addPass(CanonicalizeAliasesPass());
+  MPM.addPass(NameAnonGlobalPass());
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
+                                    OptimizationLevel Level, bool RunProfileGen,
+                                    bool IsCS, std::string ProfileFile,
+                                    std::string ProfileRemappingFile) {
+  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
+  if (!IsCS && !DisablePreInliner) {
+    InlineParams IP;
+
+    IP.DefaultThreshold = PreInlineThreshold;
+
+    // FIXME: The hint threshold has the same value used by the regular inliner
+    // when not optimzing for size. This should probably be lowered after
+    // performance testing.
+    // FIXME: this comment is cargo culted from the old pass manager, revisit).
+    IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
+    ModuleInlinerWrapperPass MIWP(IP);
+    CGSCCPassManager &CGPipeline = MIWP.getPM();
+
+    FunctionPassManager FPM;
+    FPM.addPass(SROA());
+    FPM.addPass(EarlyCSEPass());    // Catch trivial redundancies.
+    FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
+    FPM.addPass(InstCombinePass()); // Combine silly sequences.
+    invokePeepholeEPCallbacks(FPM, Level);
+
+    CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+
+    MPM.addPass(std::move(MIWP));
+
+    // Delete anything that is now dead to make sure that we don't instrument
+    // dead code. Instrumentation can end up keeping dead code around and
+    // dramatically increase code size.
+    MPM.addPass(GlobalDCEPass());
+  }
+
+  if (!RunProfileGen) {
+    assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
+    MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+    return;
+  }
+
+  // Perform PGO instrumentation.
+  MPM.addPass(PGOInstrumentationGen(IsCS));
+
+  FunctionPassManager FPM;
+  // Disable header duplication in loop rotation at -Oz.
+  FPM.addPass(createFunctionToLoopPassAdaptor(
+      LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
+      /*UseBlockFrequencyInfo=*/false));
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+  // Add the profile lowering pass.
+  InstrProfOptions Options;
+  if (!ProfileFile.empty())
+    Options.InstrProfileOutput = ProfileFile;
+  // Do counter promotion at Level greater than O0.
+  Options.DoCounterPromotion = true;
+  Options.UseBFIInPromotion = IsCS;
+  MPM.addPass(InstrProfiling(Options, IsCS));
+}
+
+void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
+                                         bool RunProfileGen, bool IsCS,
+                                         std::string ProfileFile,
+                                         std::string ProfileRemappingFile) {
+  if (!RunProfileGen) {
+    assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
+    MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+    return;
+  }
+
+  // Perform PGO instrumentation.
+  MPM.addPass(PGOInstrumentationGen(IsCS));
+  // Add the profile lowering pass.
+  InstrProfOptions Options;
+  if (!ProfileFile.empty())
+    Options.InstrProfileOutput = ProfileFile;
+  // Do not do counter promotion at O0.
+  Options.DoCounterPromotion = false;
+  Options.UseBFIInPromotion = IsCS;
+  MPM.addPass(InstrProfiling(Options, IsCS));
+}
+
+static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
+  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
+}
+
+ModuleInlinerWrapperPass
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
+                                  ThinOrFullLTOPhase Phase) {
+  InlineParams IP = getInlineParamsFromOptLevel(Level);
+  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
+      PGOOpt->Action == PGOOptions::SampleUse)
+    IP.HotCallSiteThreshold = 0;
+
+  if (PGOOpt)
+    IP.EnableDeferral = EnablePGOInlineDeferral;
+
+  ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
+                                UseInlineAdvisor, MaxDevirtIterations);
+
+  // Require the GlobalsAA analysis for the module so we can query it within
+  // the CGSCC pipeline.
+  MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
+  // Invalidate AAManager so it can be recreated and pick up the newly available
+  // GlobalsAA.
+  MIWP.addModulePass(
+      createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+
+  // Require the ProfileSummaryAnalysis for the module so we can query it within
+  // the inliner pass.
+  MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+
+  // Now begin the main postorder CGSCC pipeline.
+  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+  // manager and trying to emulate its precise behavior. Much of this doesn't
+  // make a lot of sense and we should revisit the core CGSCC structure.
+  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
+
+  // Note: historically, the PruneEH pass was run first to deduce nounwind and
+  // generally clean up exception handling overhead. It isn't clear this is
+  // valuable as the inliner doesn't currently care whether it is inlining an
+  // invoke or a call.
+
+  if (AttributorRun & AttributorRunOption::CGSCC)
+    MainCGPipeline.addPass(AttributorCGSCCPass());
+
+  // Now deduce any function attributes based in the current code.
+  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+  // When at O3 add argument promotion to the pass pipeline.
+  // FIXME: It isn't at all clear why this should be limited to O3.
+  if (Level == OptimizationLevel::O3)
+    MainCGPipeline.addPass(ArgumentPromotionPass());
+
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+    MainCGPipeline.addPass(OpenMPOptCGSCCPass());
+
+  for (auto &C : CGSCCOptimizerLateEPCallbacks)
+    C(MainCGPipeline, Level);
+
+  // Lastly, add the core function simplification pipeline nested inside the
+  // CGSCC walk.
+  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+      buildFunctionSimplificationPipeline(Level, Phase)));
+
+  MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+
+  return MIWP;
+}
+
+ModulePassManager
+PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
+                                               ThinOrFullLTOPhase Phase) {
+  ModulePassManager MPM;
+
+  // Place pseudo probe instrumentation as the first pass of the pipeline to
+  // minimize the impact of optimization changes.
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+      Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
+    MPM.addPass(SampleProfileProbePass(TM));
+
+  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
+
+  // In ThinLTO mode, when flattened profile is used, all the available
+  // profile information will be annotated in PreLink phase so there is
+  // no need to load the profile again in PostLink.
+  bool LoadSampleProfile =
+      HasSampleProfile &&
+      !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
+
+  // During the ThinLTO backend phase we perform early indirect call promotion
+  // here, before globalopt. Otherwise imported available_externally functions
+  // look unreferenced and are removed. If we are going to load the sample
+  // profile then defer until later.
+  // TODO: See if we can move later and consolidate with the location where
+  // we perform ICP when we are loading a sample profile.
+  // TODO: We pass HasSampleProfile (whether there was a sample profile file
+  // passed to the compile) to the SamplePGO flag of ICP. This is used to
+  // determine whether the new direct calls are annotated with prof metadata.
+  // Ideally this should be determined from whether the IR is annotated with
+  // sample profile, and not whether the a sample profile was provided on the
+  // command line. E.g. for flattened profiles where we will not be reloading
+  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
+  // provide the sample profile file.
+  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
+    MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
+
+  // Do basic inference of function attributes from known properties of system
+  // libraries and other oracles.
+  MPM.addPass(InferFunctionAttrsPass());
+
+  // Create an early function pass manager to cleanup the output of the
+  // frontend.
+  FunctionPassManager EarlyFPM;
+  // Lower llvm.expect to metadata before attempting transforms.
+  // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
+  EarlyFPM.addPass(LowerExpectIntrinsicPass());
+  EarlyFPM.addPass(SimplifyCFGPass());
+  EarlyFPM.addPass(SROA());
+  EarlyFPM.addPass(EarlyCSEPass());
+  EarlyFPM.addPass(CoroEarlyPass());
+  if (Level == OptimizationLevel::O3)
+    EarlyFPM.addPass(CallSiteSplittingPass());
+
+  // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
+  // to convert bitcast to direct calls so that they can be inlined during the
+  // profile annotation prepration step.
+  // More details about SamplePGO design can be found in:
+  // https://research.google.com/pubs/pub45290.html
+  // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
+  if (LoadSampleProfile)
+    EarlyFPM.addPass(InstCombinePass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
+
+  if (LoadSampleProfile) {
+    // Annotate sample profile right after early FPM to ensure freshness of
+    // the debug info.
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+                                        PGOOpt->ProfileRemappingFile, Phase));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+    // Do not invoke ICP in the LTOPrelink phase as it makes it hard
+    // for the profile annotation to be accurate in the LTO backend.
+    if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
+        Phase != ThinOrFullLTOPhase::FullLTOPreLink)
+      // We perform early indirect call promotion here, before globalopt.
+      // This is important for the ThinLTO backend phase because otherwise
+      // imported available_externally functions look unreferenced and are
+      // removed.
+      MPM.addPass(
+          PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
+  }
+
+  // Try to perform OpenMP specific optimizations on the module. This is a
+  // (quick!) no-op if there are no OpenMP runtime calls present in the module.
+  if (Level != OptimizationLevel::O0)
+    MPM.addPass(OpenMPOptPass());
+
+  if (AttributorRun & AttributorRunOption::MODULE)
+    MPM.addPass(AttributorPass());
+
+  // Lower type metadata and the type.test intrinsic in the ThinLTO
+  // post link pipeline after ICP. This is to enable usage of the type
+  // tests in ICP sequences.
+  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+  for (auto &C : PipelineEarlySimplificationEPCallbacks)
+    C(MPM, Level);
+
+  // Specialize functions with IPSCCP.
+  if (EnableFunctionSpecialization)
+    MPM.addPass(FunctionSpecializationPass());
+
+  // Interprocedural constant propagation now that basic cleanup has occurred
+  // and prior to optimizing globals.
+  // FIXME: This position in the pipeline hasn't been carefully considered in
+  // years, it should be re-analyzed.
+  MPM.addPass(IPSCCPPass());
+
+  // Attach metadata to indirect call sites indicating the set of functions
+  // they may target at run-time. This should follow IPSCCP.
+  MPM.addPass(CalledValuePropagationPass());
+
+  // Optimize globals to try and fold them into constants.
+  MPM.addPass(GlobalOptPass());
+
+  // Promote any localized globals to SSA registers.
+  // FIXME: Should this instead by a run of SROA?
+  // FIXME: We should probably run instcombine and simplifycfg afterward to
+  // delete control flows that are dead once globals have been folded to
+  // constants.
+  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+  // Remove any dead arguments exposed by cleanups and constant folding
+  // globals.
+  MPM.addPass(DeadArgumentEliminationPass());
+
+  // Create a small function pass pipeline to cleanup after all the global
+  // optimizations.
+  FunctionPassManager GlobalCleanupPM;
+  GlobalCleanupPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
+
+  GlobalCleanupPM.addPass(SimplifyCFGPass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
+
+  // Add all the requested passes for instrumentation PGO, if requested.
+  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+      (PGOOpt->Action == PGOOptions::IRInstr ||
+       PGOOpt->Action == PGOOptions::IRUse)) {
+    addPGOInstrPasses(MPM, Level,
+                      /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
+                      /* IsCS */ false, PGOOpt->ProfileFile,
+                      PGOOpt->ProfileRemappingFile);
+    MPM.addPass(PGOIndirectCallPromotion(false, false));
+  }
+  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+      PGOOpt->CSAction == PGOOptions::CSIRInstr)
+    MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
+
+  // Synthesize function entry counts for non-PGO compilation.
+  if (EnableSyntheticCounts && !PGOOpt)
+    MPM.addPass(SyntheticCountsPropagation());
+
+  MPM.addPass(buildInlinerPipeline(Level, Phase));
+
+  if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
+    MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+    MPM.addPass(ModuleMemProfilerPass());
+  }
+
+  return MPM;
+}
+
+/// TODO: Should LTO cause any 
diff erences to this set of passes?
+void PassBuilder::addVectorPasses(OptimizationLevel Level,
+                                  FunctionPassManager &FPM, bool IsFullLTO) {
+  FPM.addPass(LoopVectorizePass(
+      LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
+
+  if (IsFullLTO) {
+    // The vectorizer may have significantly shortened a loop body; unroll
+    // again. Unroll small loops to hide loop backedge latency and saturate any
+    // parallel execution resources of an out-of-order processor. We also then
+    // need to clean up redundancies and loop invariant code.
+    // FIXME: It would be really good to use a loop-integrated instruction
+    // combiner for cleanup here so that the unrolling and LICM can be pipelined
+    // across the loop nests.
+    // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+    if (EnableUnrollAndJam && PTO.LoopUnrolling)
+      FPM.addPass(createFunctionToLoopPassAdaptor(
+          LoopUnrollAndJamPass(Level.getSpeedupLevel())));
+    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+        Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+        PTO.ForgetAllSCEVInLoopUnroll)));
+    FPM.addPass(WarnMissedTransformationsPass());
+  }
+
+  if (!IsFullLTO) {
+    // Eliminate loads by forwarding stores from the previous iteration to loads
+    // of the current iteration.
+    FPM.addPass(LoopLoadEliminationPass());
+  }
+  // Cleanup after the loop optimization passes.
+  FPM.addPass(InstCombinePass());
+
+  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+    // At higher optimization levels, try to clean up any runtime overlap and
+    // alignment checks inserted by the vectorizer. We want to track correlated
+    // runtime checks for two inner loops in the same outer loop, fold any
+    // common computations, hoist loop-invariant aspects out of any outer loop,
+    // and unswitch the runtime checks if possible. Once hoisted, we may have
+    // dead (or speculatable) control flows or more combining opportunities.
+    FPM.addPass(EarlyCSEPass());
+    FPM.addPass(CorrelatedValuePropagationPass());
+    FPM.addPass(InstCombinePass());
+    LoopPassManager LPM;
+    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+    LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
+                                       OptimizationLevel::O3));
+    FPM.addPass(
+        RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+    FPM.addPass(
+        createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
+                                        /*UseBlockFrequencyInfo=*/true));
+    FPM.addPass(SimplifyCFGPass());
+    FPM.addPass(InstCombinePass());
+  }
+
+  // Now that we've formed fast to execute loop structures, we do further
+  // optimizations. These are run afterward as they might block doing complex
+  // analyses and transforms such as what are needed for loop vectorization.
+
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+  // GVN, loop transforms, and others have already run, so it's now better to
+  // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
+  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+                                  .forwardSwitchCondToPhi(true)
+                                  .convertSwitchToLookupTable(true)
+                                  .needCanonicalLoops(false)
+                                  .hoistCommonInsts(true)
+                                  .sinkCommonInsts(true)));
+
+  if (IsFullLTO) {
+    FPM.addPass(SCCPPass());
+    FPM.addPass(InstCombinePass());
+    FPM.addPass(BDCEPass());
+  }
+
+  // Optimize parallel scalar instruction chains into SIMD instructions.
+  if (PTO.SLPVectorization) {
+    FPM.addPass(SLPVectorizerPass());
+    if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+      FPM.addPass(EarlyCSEPass());
+    }
+  }
+  // Enhance/cleanup vector code.
+  FPM.addPass(VectorCombinePass());
+
+  if (!IsFullLTO) {
+    FPM.addPass(InstCombinePass());
+    // Unroll small loops to hide loop backedge latency and saturate any
+    // parallel execution resources of an out-of-order processor. We also then
+    // need to clean up redundancies and loop invariant code.
+    // FIXME: It would be really good to use a loop-integrated instruction
+    // combiner for cleanup here so that the unrolling and LICM can be pipelined
+    // across the loop nests.
+    // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+    if (EnableUnrollAndJam && PTO.LoopUnrolling) {
+      FPM.addPass(createFunctionToLoopPassAdaptor(
+          LoopUnrollAndJamPass(Level.getSpeedupLevel())));
+    }
+    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+        Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+        PTO.ForgetAllSCEVInLoopUnroll)));
+    FPM.addPass(WarnMissedTransformationsPass());
+    FPM.addPass(InstCombinePass());
+    FPM.addPass(
+        RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+    FPM.addPass(createFunctionToLoopPassAdaptor(
+        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+        /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+  }
+
+  // Now that we've vectorized and unrolled loops, we may have more refined
+  // alignment information, try to re-derive it here.
+  FPM.addPass(AlignmentFromAssumptionsPass());
+
+  if (IsFullLTO)
+    FPM.addPass(InstCombinePass());
+}
+
+ModulePassManager
+PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
+                                             bool LTOPreLink) {
+  ModulePassManager MPM;
+
+  // Optimize globals now that the module is fully simplified.
+  MPM.addPass(GlobalOptPass());
+  MPM.addPass(GlobalDCEPass());
+
+  // Run partial inlining pass to partially inline functions that have
+  // large bodies.
+  if (RunPartialInlining)
+    MPM.addPass(PartialInlinerPass());
+
+  // Remove avail extern fns and globals definitions since we aren't compiling
+  // an object file for later LTO. For LTO we want to preserve these so they
+  // are eligible for inlining at link-time. Note if they are unreferenced they
+  // will be removed by GlobalDCE later, so this only impacts referenced
+  // available externally globals. Eventually they will be suppressed during
+  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
+  // may make globals referenced by available external functions dead and saves
+  // running remaining passes on the eliminated functions. These should be
+  // preserved during prelinking for link-time inlining decisions.
+  if (!LTOPreLink)
+    MPM.addPass(EliminateAvailableExternallyPass());
+
+  if (EnableOrderFileInstrumentation)
+    MPM.addPass(InstrOrderFilePass());
+
+  // Do RPO function attribute inference across the module to forward-propagate
+  // attributes where applicable.
+  // FIXME: Is this really an optimization rather than a canonicalization?
+  MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+  // Do a post inline PGO instrumentation and use pass. This is a context
+  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
+  // cross-module inline has not been done yet. The context sensitive
+  // instrumentation is after all the inlines are done.
+  if (!LTOPreLink && PGOOpt) {
+    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
+                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
+                        PGOOpt->ProfileRemappingFile);
+    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
+                        /* IsCS */ true, PGOOpt->ProfileFile,
+                        PGOOpt->ProfileRemappingFile);
+  }
+
+  // Re-require GloblasAA here prior to function passes. This is particularly
+  // useful as the above will have inlined, DCE'ed, and function-attr
+  // propagated everything. We should at this point have a reasonably minimal
+  // and richly annotated call graph. By computing aliasing and mod/ref
+  // information for all local globals here, the late loop passes and notably
+  // the vectorizer will be able to use them to help recognize vectorizable
+  // memory operations.
+  MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+
+  FunctionPassManager OptimizePM;
+  OptimizePM.addPass(Float2IntPass());
+  OptimizePM.addPass(LowerConstantIntrinsicsPass());
+
+  if (EnableMatrix) {
+    OptimizePM.addPass(LowerMatrixIntrinsicsPass());
+    OptimizePM.addPass(EarlyCSEPass());
+  }
+
+  // FIXME: We need to run some loop optimizations to re-rotate loops after
+  // simplifycfg and others undo their rotation.
+
+  // Optimize the loop execution. These passes operate on entire loop nests
+  // rather than on each loop in an inside-out manner, and so they are actually
+  // function passes.
+
+  for (auto &C : VectorizerStartEPCallbacks)
+    C(OptimizePM, Level);
+
+  // First rotate loops that may have been un-rotated by prior passes.
+  // Disable header duplication at -Oz.
+  OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+      LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
+      /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
+
+  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
+  // into separate loop that would otherwise inhibit vectorization.  This is
+  // currently only performed for loops marked with the metadata
+  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
+  OptimizePM.addPass(LoopDistributePass());
+
+  // Populates the VFABI attribute with the scalar-to-vector mappings
+  // from the TargetLibraryInfo.
+  OptimizePM.addPass(InjectTLIMappings());
+
+  addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
+
+  // Split out cold code. Splitting is done late to avoid hiding context from
+  // other optimizations and inadvertently regressing performance. The tradeoff
+  // is that this has a higher code size cost than splitting early.
+  if (EnableHotColdSplit && !LTOPreLink)
+    MPM.addPass(HotColdSplittingPass());
+
+  // Search the code for similar regions of code. If enough similar regions can
+  // be found where extracting the regions into their own function will decrease
+  // the size of the program, we extract the regions, a deduplicate the
+  // structurally similar regions.
+  if (EnableIROutliner)
+    MPM.addPass(IROutlinerPass());
+
+  // Merge functions if requested.
+  if (PTO.MergeFunctions)
+    MPM.addPass(MergeFunctionsPass());
+
+  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
+  // canonicalization pass that enables other optimizations. As a result,
+  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
+  // result too early.
+  OptimizePM.addPass(LoopSinkPass());
+
+  // And finally clean up LCSSA form before generating code.
+  OptimizePM.addPass(InstSimplifyPass());
+
+  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
+  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
+  // flattening of blocks.
+  OptimizePM.addPass(DivRemPairsPass());
+
+  // LoopSink (and other loop passes since the last simplifyCFG) might have
+  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+  OptimizePM.addPass(SimplifyCFGPass());
+
+  OptimizePM.addPass(CoroCleanupPass());
+
+  // Add the core optimizing pipeline.
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
+
+  for (auto &C : OptimizerLastEPCallbacks)
+    C(MPM, Level);
+
+  if (PTO.CallGraphProfile)
+    MPM.addPass(CGProfilePass());
+
+  // Now we need to do some global optimization transforms.
+  // FIXME: It would seem like these should come first in the optimization
+  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
+  // ordering here.
+  MPM.addPass(GlobalDCEPass());
+  MPM.addPass(ConstantMergePass());
+
+  // TODO: Relative look table converter pass caused an issue when full lto is
+  // enabled. See https://reviews.llvm.org/D94355 for more details.
+  // Until the issue fixed, disable this pass during pre-linking phase.
+  if (!LTOPreLink)
+    MPM.addPass(RelLookupTableConverterPass());
+
+  return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
+                                           bool LTOPreLink) {
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
+
+  ModulePassManager MPM;
+
+  // Convert @llvm.global.annotations to !annotation metadata.
+  MPM.addPass(Annotation2MetadataPass());
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  // Apply module pipeline start EP callback.
+  for (auto &C : PipelineStartEPCallbacks)
+    C(MPM, Level);
+
+  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+  // Add the core simplification pipeline.
+  MPM.addPass(buildModuleSimplificationPipeline(
+      Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
+                        : ThinOrFullLTOPhase::None));
+
+  // Now add the optimization pipeline.
+  MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
+
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
+  // Emit annotation remarks.
+  addAnnotationRemarksPass(MPM);
+
+  if (LTOPreLink)
+    addRequiredLTOPreLinkPasses(MPM);
+
+  return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
+
+  ModulePassManager MPM;
+
+  // Convert @llvm.global.annotations to !annotation metadata.
+  MPM.addPass(Annotation2MetadataPass());
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+  // Apply module pipeline start EP callback.
+  for (auto &C : PipelineStartEPCallbacks)
+    C(MPM, Level);
+
+  // If we are planning to perform ThinLTO later, we don't bloat the code with
+  // unrolling/vectorization/... now. Just simplify the module as much as we
+  // can.
+  MPM.addPass(buildModuleSimplificationPipeline(
+      Level, ThinOrFullLTOPhase::ThinLTOPreLink));
+
+  // Run partial inlining pass to partially inline functions that have
+  // large bodies.
+  // FIXME: It isn't clear whether this is really the right place to run this
+  // in ThinLTO. Because there is another canonicalization and simplification
+  // phase that will run after the thin link, running this here ends up with
+  // less information than will be available later and it may grow functions in
+  // ways that aren't beneficial.
+  if (RunPartialInlining)
+    MPM.addPass(PartialInlinerPass());
+
+  // Reduce the size of the IR as much as possible.
+  MPM.addPass(GlobalOptPass());
+
+  // Module simplification splits coroutines, but does not fully clean up
+  // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
+  // on these, we schedule the cleanup here.
+  MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
+  // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
+  // optimization is going to be done in PostLink stage, but clang can't
+  // add callbacks there in case of in-process ThinLTO called by linker.
+  for (auto &C : OptimizerLastEPCallbacks)
+    C(MPM, Level);
+
+  // Emit annotation remarks.
+  addAnnotationRemarksPass(MPM);
+
+  addRequiredLTOPreLinkPasses(MPM);
+
+  return MPM;
+}
+
+ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
+    OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
+  ModulePassManager MPM;
+
+  // Convert @llvm.global.annotations to !annotation metadata.
+  MPM.addPass(Annotation2MetadataPass());
+
+  if (ImportSummary) {
+    // These passes import type identifier resolutions for whole-program
+    // devirtualization and CFI. They must run early because other passes may
+    // disturb the specific instruction patterns that these passes look for,
+    // creating dependencies on resolutions that may not appear in the summary.
+    //
+    // For example, GVN may transform the pattern assume(type.test) appearing in
+    // two basic blocks into assume(phi(type.test, type.test)), which would
+    // transform a dependency on a WPD resolution into a dependency on a type
+    // identifier resolution for CFI.
+    //
+    // Also, WPD has access to more precise information than ICP and can
+    // devirtualize more effectively, so it should operate on the IR first.
+    //
+    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+    // metadata and intrinsics.
+    MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
+    MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
+  }
+
+  if (Level == OptimizationLevel::O0) {
+    // Run a second time to clean up any type tests left behind by WPD for use
+    // in ICP.
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+    // Drop available_externally and unreferenced globals. This is necessary
+    // with ThinLTO in order to avoid leaving undefined references to dead
+    // globals in the object file.
+    MPM.addPass(EliminateAvailableExternallyPass());
+    MPM.addPass(GlobalDCEPass());
+    return MPM;
+  }
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  // Add the core simplification pipeline.
+  MPM.addPass(buildModuleSimplificationPipeline(
+      Level, ThinOrFullLTOPhase::ThinLTOPostLink));
+
+  // Now add the optimization pipeline.
+  MPM.addPass(buildModuleOptimizationPipeline(Level));
+
+  // Emit annotation remarks.
+  addAnnotationRemarksPass(MPM);
+
+  return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
+  // FIXME: We should use a customized pre-link pipeline!
+  return buildPerModuleDefaultPipeline(Level,
+                                       /* LTOPreLink */ true);
+}
+
+ModulePassManager
+PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
+                                     ModuleSummaryIndex *ExportSummary) {
+  ModulePassManager MPM;
+
+  // Convert @llvm.global.annotations to !annotation metadata.
+  MPM.addPass(Annotation2MetadataPass());
+
+  // Create a function that performs CFI checks for cross-DSO calls with targets
+  // in the current module.
+  MPM.addPass(CrossDSOCFIPass());
+
+  if (Level == OptimizationLevel::O0) {
+    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+    // metadata and intrinsics.
+    MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+    // Run a second time to clean up any type tests left behind by WPD for use
+    // in ICP.
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+    // Emit annotation remarks.
+    addAnnotationRemarksPass(MPM);
+
+    return MPM;
+  }
+
+  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
+    // Load sample profile before running the LTO optimization pipeline.
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+                                        PGOOpt->ProfileRemappingFile,
+                                        ThinOrFullLTOPhase::FullLTOPostLink));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+  }
+
+  // Remove unused virtual tables to improve the quality of code generated by
+  // whole-program devirtualization and bitset lowering.
+  MPM.addPass(GlobalDCEPass());
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  // Do basic inference of function attributes from known properties of system
+  // libraries and other oracles.
+  MPM.addPass(InferFunctionAttrsPass());
+
+  if (Level.getSpeedupLevel() > 1) {
+    FunctionPassManager EarlyFPM;
+    EarlyFPM.addPass(CallSiteSplittingPass());
+    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
+
+    // Indirect call promotion. This should promote all the targets that are
+    // left by the earlier promotion pass that promotes intra-module targets.
+    // This two-step promotion is to save the compile time. For LTO, it should
+    // produce the same result as if we only do promotion here.
+    MPM.addPass(PGOIndirectCallPromotion(
+        true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
+
+    if (EnableFunctionSpecialization)
+      MPM.addPass(FunctionSpecializationPass());
+    // Propagate constants at call sites into the functions they call.  This
+    // opens opportunities for globalopt (and inlining) by substituting function
+    // pointers passed as arguments to direct uses of functions.
+    MPM.addPass(IPSCCPPass());
+
+    // Attach metadata to indirect call sites indicating the set of functions
+    // they may target at run-time. This should follow IPSCCP.
+    MPM.addPass(CalledValuePropagationPass());
+  }
+
+  // Now deduce any function attributes based in the current code.
+  MPM.addPass(
+      createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+
+  // Do RPO function attribute inference across the module to forward-propagate
+  // attributes where applicable.
+  // FIXME: Is this really an optimization rather than a canonicalization?
+  MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+  // Use in-range annotations on GEP indices to split globals where beneficial.
+  MPM.addPass(GlobalSplitPass());
+
+  // Run whole program optimization of virtual call when the list of callees
+  // is fixed.
+  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+
+  // Stop here at -O1.
+  if (Level == OptimizationLevel::O1) {
+    // The LowerTypeTestsPass needs to run to lower type metadata and the
+    // type.test intrinsics. The pass does nothing if CFI is disabled.
+    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+    // Run a second time to clean up any type tests left behind by WPD for use
+    // in ICP (which is performed earlier than this in the regular LTO
+    // pipeline).
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+    // Emit annotation remarks.
+    addAnnotationRemarksPass(MPM);
+
+    return MPM;
+  }
+
+  // Optimize globals to try and fold them into constants.
+  MPM.addPass(GlobalOptPass());
+
+  // Promote any localized globals to SSA registers.
+  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+  // Linking modules together can lead to duplicate global constant, only
+  // keep one copy of each constant.
+  MPM.addPass(ConstantMergePass());
+
+  // Remove unused arguments from functions.
+  MPM.addPass(DeadArgumentEliminationPass());
+
+  // Reduce the code after globalopt and ipsccp.  Both can open up significant
+  // simplification opportunities, and both can propagate functions through
+  // function pointers.  When this happens, we often have to resolve varargs
+  // calls, etc, so let instcombine do this.
+  FunctionPassManager PeepholeFPM;
+  if (Level == OptimizationLevel::O3)
+    PeepholeFPM.addPass(AggressiveInstCombinePass());
+  PeepholeFPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(PeepholeFPM, Level);
+
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
+
+  // Note: historically, the PruneEH pass was run first to deduce nounwind and
+  // generally clean up exception handling overhead. It isn't clear this is
+  // valuable as the inliner doesn't currently care whether it is inlining an
+  // invoke or a call.
+  // Run the inliner now.
+  MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
+
+  // Optimize globals again after we ran the inliner.
+  MPM.addPass(GlobalOptPass());
+
+  // Garbage collect dead functions.
+  MPM.addPass(GlobalDCEPass());
+
+  // If we didn't decide to inline a function, check to see if we can
+  // transform it to pass arguments by value instead of by reference.
+  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
+
+  FunctionPassManager FPM;
+  // The IPO Passes may leave cruft around. Clean up after them.
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+
+  // Do a post inline PGO instrumentation and use pass. This is a context
+  // sensitive PGO pass.
+  if (PGOOpt) {
+    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
+                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
+                        PGOOpt->ProfileRemappingFile);
+    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+      addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
+                        /* IsCS */ true, PGOOpt->ProfileFile,
+                        PGOOpt->ProfileRemappingFile);
+  }
+
+  // Break up allocas
+  FPM.addPass(SROA());
+
+  // LTO provides additional opportunities for tailcall elimination due to
+  // link-time inlining, and visibility of nocapture attribute.
+  FPM.addPass(TailCallElimPass());
+
+  // Run a few AA driver optimizations here and now to cleanup the code.
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+  MPM.addPass(
+      createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+
+  // Require the GlobalsAA analysis for the module so we can query it within
+  // MainFPM.
+  MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+  // Invalidate AAManager so it can be recreated and pick up the newly available
+  // GlobalsAA.
+  MPM.addPass(
+      createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+
+  FunctionPassManager MainFPM;
+  MainFPM.addPass(createFunctionToLoopPassAdaptor(
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+
+  if (RunNewGVN)
+    MainFPM.addPass(NewGVNPass());
+  else
+    MainFPM.addPass(GVN());
+
+  // Remove dead memcpy()'s.
+  MainFPM.addPass(MemCpyOptPass());
+
+  // Nuke dead stores.
+  MainFPM.addPass(DSEPass());
+  MainFPM.addPass(MergedLoadStoreMotionPass());
+
+  // More loops are countable; try to optimize them.
+  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
+    MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+
+  if (EnableConstraintElimination)
+    MainFPM.addPass(ConstraintEliminationPass());
+
+  LoopPassManager LPM;
+  LPM.addPass(IndVarSimplifyPass());
+  LPM.addPass(LoopDeletionPass());
+  // FIXME: Add loop interchange.
+
+  // Unroll small loops and perform peeling.
+  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+                                 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+                                 PTO.ForgetAllSCEVInLoopUnroll));
+  // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
+  // *All* loop passes must preserve it, in order to be able to use it.
+  MainFPM.addPass(createFunctionToLoopPassAdaptor(
+      std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
+
+  MainFPM.addPass(LoopDistributePass());
+
+  addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
+
+  invokePeepholeEPCallbacks(MainFPM, Level);
+  MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
+
+  // Lower type metadata and the type.test intrinsic. This pass supports
+  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
+  // to be run at link time if CFI is enabled. This pass does nothing if
+  // CFI is disabled.
+  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+  // Run a second time to clean up any type tests left behind by WPD for use
+  // in ICP (which is performed earlier than this in the regular LTO pipeline).
+  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+  // Enable splitting late in the FullLTO post-link pipeline. This is done in
+  // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
+  if (EnableHotColdSplit)
+    MPM.addPass(HotColdSplittingPass());
+
+  // Add late LTO optimization passes.
+  // Delete basic blocks, which optimization passes may have killed.
+  MPM.addPass(createModuleToFunctionPassAdaptor(
+      SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
+
+  // Drop bodies of available eternally objects to improve GlobalDCE.
+  MPM.addPass(EliminateAvailableExternallyPass());
+
+  // Now that we have optimized the program, discard unreachable functions.
+  MPM.addPass(GlobalDCEPass());
+
+  if (PTO.MergeFunctions)
+    MPM.addPass(MergeFunctionsPass());
+
+  // Emit annotation remarks.
+  addAnnotationRemarksPass(MPM);
+
+  return MPM;
+}
+
+ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
+                                                      bool LTOPreLink) {
+  assert(Level == OptimizationLevel::O0 &&
+         "buildO0DefaultPipeline should only be used with O0");
+
+  ModulePassManager MPM;
+
+  // Perform pseudo probe instrumentation in O0 mode. This is for the
+  // consistency between 
diff erent build modes. For example, a LTO build can be
+  // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
+  // the postlink will require pseudo probe instrumentation in the prelink.
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(SampleProfileProbePass(TM));
+
+  if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
+                 PGOOpt->Action == PGOOptions::IRUse))
+    addPGOInstrPassesForO0(
+        MPM,
+        /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
+        /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
+
+  for (auto &C : PipelineStartEPCallbacks)
+    C(MPM, Level);
+
+  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+  for (auto &C : PipelineEarlySimplificationEPCallbacks)
+    C(MPM, Level);
+
+  // Build a minimal pipeline based on the semantics required by LLVM,
+  // which is just that always inlining occurs. Further, disable generating
+  // lifetime intrinsics to avoid enabling further optimizations during
+  // code generation.
+  MPM.addPass(AlwaysInlinerPass(
+      /*InsertLifetimeIntrinsics=*/false));
+
+  if (PTO.MergeFunctions)
+    MPM.addPass(MergeFunctionsPass());
+
+  if (EnableMatrix)
+    MPM.addPass(
+        createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
+
+  if (!CGSCCOptimizerLateEPCallbacks.empty()) {
+    CGSCCPassManager CGPM;
+    for (auto &C : CGSCCOptimizerLateEPCallbacks)
+      C(CGPM, Level);
+    if (!CGPM.isEmpty())
+      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+  }
+  if (!LateLoopOptimizationsEPCallbacks.empty()) {
+    LoopPassManager LPM;
+    for (auto &C : LateLoopOptimizationsEPCallbacks)
+      C(LPM, Level);
+    if (!LPM.isEmpty()) {
+      MPM.addPass(createModuleToFunctionPassAdaptor(
+          createFunctionToLoopPassAdaptor(std::move(LPM))));
+    }
+  }
+  if (!LoopOptimizerEndEPCallbacks.empty()) {
+    LoopPassManager LPM;
+    for (auto &C : LoopOptimizerEndEPCallbacks)
+      C(LPM, Level);
+    if (!LPM.isEmpty()) {
+      MPM.addPass(createModuleToFunctionPassAdaptor(
+          createFunctionToLoopPassAdaptor(std::move(LPM))));
+    }
+  }
+  if (!ScalarOptimizerLateEPCallbacks.empty()) {
+    FunctionPassManager FPM;
+    for (auto &C : ScalarOptimizerLateEPCallbacks)
+      C(FPM, Level);
+    if (!FPM.isEmpty())
+      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+  }
+  if (!VectorizerStartEPCallbacks.empty()) {
+    FunctionPassManager FPM;
+    for (auto &C : VectorizerStartEPCallbacks)
+      C(FPM, Level);
+    if (!FPM.isEmpty())
+      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+  }
+
+  MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
+  CGSCCPassManager CGPM;
+  CGPM.addPass(CoroSplitPass());
+  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+  MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+
+  for (auto &C : OptimizerLastEPCallbacks)
+    C(MPM, Level);
+
+  if (LTOPreLink)
+    addRequiredLTOPreLinkPasses(MPM);
+
+  return MPM;
+}
+
+AAManager PassBuilder::buildDefaultAAPipeline() {
+  AAManager AA;
+
+  // The order in which these are registered determines their priority when
+  // being queried.
+
+  // First we register the basic alias analysis that provides the majority of
+  // per-function local AA logic. This is a stateless, on-demand local set of
+  // AA techniques.
+  AA.registerFunctionAnalysis<BasicAA>();
+
+  // Next we query fast, specialized alias analyses that wrap IR-embedded
+  // information about aliasing.
+  AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+  AA.registerFunctionAnalysis<TypeBasedAA>();
+
+  // Add support for querying global aliasing information when available.
+  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
+  // analysis, all that the `AAManager` can do is query for any *cached*
+  // results from `GlobalsAA` through a readonly proxy.
+  AA.registerModuleAnalysis<GlobalsAA>();
+
+  // Add target-specific alias analyses.
+  if (TM)
+    TM->registerDefaultAliasAnalyses(AA);
+
+  return AA;
+}

diff  --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn
index ff5238b3e655c..019a353c69caa 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn
@@ -17,8 +17,10 @@ static_library("Passes") {
     "//llvm/lib/Transforms/Vectorize",
   ]
   sources = [
+    "OptimizationLevel.cpp",
     "PassBuilder.cpp",
     "PassBuilderBindings.cpp",
+    "PassBuilderPipelines.cpp",
     "PassPlugin.cpp",
     "StandardInstrumentations.cpp",
   ]


        


More information about the llvm-commits mailing list