[llvm] 88d8f10 - [PassManager] add helper function to hold set of vector passes (2nd try)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon May 10 11:44:41 PDT 2021
Author: Sanjay Patel
Date: 2021-05-10T14:43:00-04:00
New Revision: 88d8f10baf30b0df18eb542c426afc29b69f1313
URL: https://github.com/llvm/llvm-project/commit/88d8f10baf30b0df18eb542c426afc29b69f1313
DIFF: https://github.com/llvm/llvm-project/commit/88d8f10baf30b0df18eb542c426afc29b69f1313.diff
LOG: [PassManager] add helper function to hold set of vector passes (2nd try)
This is better no-functional-change-intended than the 1st attempt.
As noted in D102002, there were at least 2 diffs that went
unchecked in pass manager regressions tests: different pass
parameters (SimplifyCFG) and an extension point/callback.
Those should be lifted from the original code blocks correctly
now.
Added:
Modified:
llvm/include/llvm/Passes/PassBuilder.h
llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 339a2b7d42412..f8252b9583677 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -709,6 +709,9 @@ class PassBuilder {
void addRequiredLTOPreLinkPasses(ModulePassManager &MPM);
+ void addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM,
+ bool IsLTO);
+
static Optional<std::vector<PipelineElement>>
parsePipelineText(StringRef Text);
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index a9928c3f5a40d..46d9bee2e10f0 100644
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -218,7 +218,8 @@ class PassManagerBuilder {
void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM);
void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS);
void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM);
-
+ void addVectorPasses(legacy::PassManagerBase &PM, bool IsLTO);
+
public:
/// populateFunctionPassManager - This fills in the function pass manager,
/// which is expected to be run on each function immediately as it is
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index e6554f6106ed7..6c2315cb65a35 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1201,6 +1201,127 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
return MPM;
}
+/// TODO: Should LTO cause any
diff erences to this set of passes?
+void PassBuilder::addVectorPasses(OptimizationLevel Level,
+ FunctionPassManager &FPM, bool IsLTO) {
+ FPM.addPass(LoopVectorizePass(
+ LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
+
+ if (IsLTO) {
+ // The vectorizer may have significantly shortened a loop body; unroll
+ // again. Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && PTO.LoopUnrolling)
+ FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
+ FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+ Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
+ FPM.addPass(WarnMissedTransformationsPass());
+ }
+
+ if (!IsLTO) {
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ FPM.addPass(LoopLoadEliminationPass());
+ }
+ // Cleanup after the loop optimization passes.
+ FPM.addPass(InstCombinePass());
+
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correlated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ FPM.addPass(EarlyCSEPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(InstCombinePass());
+ LoopPassManager LPM;
+ LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
+ OptimizationLevel::O3));
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM), EnableMSSALoopDependency,
+ /*UseBlockFrequencyInfo=*/true));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ }
+
+ if (IsLTO) {
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
+ } else {
+ // Now that we've formed fast to execute loop structures, we do further
+ // optimizations. These are run afterward as they might block doing complex
+ // analyses and transforms such as what are needed for loop vectorization.
+
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+ // GVN, loop transforms, and others have already run, so it's now better to
+ // convert to more optimized IR using more aggressive simplify CFG options.
+ // The extra sinking transform can create larger basic blocks, so do this
+ // before SLP vectorization.
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
+ }
+ if (IsLTO) {
+ FPM.addPass(SCCPPass());
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(BDCEPass());
+ }
+
+ // Optimize parallel scalar instruction chains into SIMD instructions.
+ if (PTO.SLPVectorization) {
+ FPM.addPass(SLPVectorizerPass());
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ FPM.addPass(EarlyCSEPass());
+ }
+ }
+ // Enhance/cleanup vector code.
+ FPM.addPass(VectorCombinePass());
+
+ if (!IsLTO) {
+ FPM.addPass(InstCombinePass());
+ // Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && PTO.LoopUnrolling) {
+ FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
+ }
+ FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+ Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
+ FPM.addPass(WarnMissedTransformationsPass());
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
+ }
+
+ // Now that we've vectorized and unrolled loops, we may have more refined
+ // alignment information, try to re-derive it here.
+ FPM.addPass(AlignmentFromAssumptionsPass());
+
+ if (IsLTO)
+ FPM.addPass(InstCombinePass());
+}
+
ModulePassManager
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
bool LTOPreLink) {
@@ -1295,91 +1416,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// from the TargetLibraryInfo.
OptimizePM.addPass(InjectTLIMappings());
- // Now run the core loop vectorizer.
- OptimizePM.addPass(LoopVectorizePass(
- LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
-
- // Eliminate loads by forwarding stores from the previous iteration to loads
- // of the current iteration.
- OptimizePM.addPass(LoopLoadEliminationPass());
-
- // Cleanup after the loop optimization passes.
- OptimizePM.addPass(InstCombinePass());
-
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- // At higher optimization levels, try to clean up any runtime overlap and
- // alignment checks inserted by the vectorizer. We want to track correlated
- // runtime checks for two inner loops in the same outer loop, fold any
- // common computations, hoist loop-invariant aspects out of any outer loop,
- // and unswitch the runtime checks if possible. Once hoisted, we may have
- // dead (or speculatable) control flows or more combining opportunities.
- OptimizePM.addPass(EarlyCSEPass());
- OptimizePM.addPass(CorrelatedValuePropagationPass());
- OptimizePM.addPass(InstCombinePass());
- LoopPassManager LPM;
- LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
- OptimizationLevel::O3));
- OptimizePM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/true));
- OptimizePM.addPass(SimplifyCFGPass());
- OptimizePM.addPass(InstCombinePass());
- }
-
- // Now that we've formed fast to execute loop structures, we do further
- // optimizations. These are run afterward as they might block doing complex
- // analyses and transforms such as what are needed for loop vectorization.
-
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
- // GVN, loop transforms, and others have already run, so it's now better to
- // convert to more optimized IR using more aggressive simplify CFG options.
- // The extra sinking transform can create larger basic blocks, so do this
- // before SLP vectorization.
- OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
-
- // Optimize parallel scalar instruction chains into SIMD instructions.
- if (PTO.SLPVectorization) {
- OptimizePM.addPass(SLPVectorizerPass());
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- OptimizePM.addPass(EarlyCSEPass());
- }
- }
-
- // Enhance/cleanup vector code.
- OptimizePM.addPass(VectorCombinePass());
- OptimizePM.addPass(InstCombinePass());
-
- // Unroll small loops to hide loop backedge latency and saturate any parallel
- // execution resources of an out-of-order processor. We also then need to
- // clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
- }
- OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
- OptimizePM.addPass(WarnMissedTransformationsPass());
- OptimizePM.addPass(InstCombinePass());
- OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
-
- // Now that we've vectorized and unrolled loops, we may have more refined
- // alignment information, try to re-derive it here.
- OptimizePM.addPass(AlignmentFromAssumptionsPass());
+ addVectorPasses(Level, OptimizePM, /* IsLTO */ false);
// Split out cold code. Splitting is done late to avoid hiding context from
// other optimizations and inadvertently regressing performance. The tradeoff
@@ -1825,39 +1862,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
MainFPM.addPass(LoopDistributePass());
- MainFPM.addPass(LoopVectorizePass(
- LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
- // The vectorizer may have significantly shortened a loop body; unroll again.
- MainFPM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
-
- MainFPM.addPass(WarnMissedTransformationsPass());
-
- MainFPM.addPass(InstCombinePass());
- MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
- MainFPM.addPass(SCCPPass());
- MainFPM.addPass(InstCombinePass());
- MainFPM.addPass(BDCEPass());
-
- // More scalar chains could be vectorized due to more alias information
- if (PTO.SLPVectorization) {
- MainFPM.addPass(SLPVectorizerPass());
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- MainFPM.addPass(EarlyCSEPass());
- }
- }
-
- MainFPM.addPass(VectorCombinePass()); // Clean up partial vectorization.
-
- // After vectorization, assume intrinsics may tell us more about pointer
- // alignments.
- MainFPM.addPass(AlignmentFromAssumptionsPass());
- // FIXME: Conditionally run LoadCombine here, after it's ported
- // (in case we still have this pass, given its questionable usefulness).
+ addVectorPasses(Level, MainFPM, /* IsLTO */ true);
- MainFPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(MainFPM, Level);
MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 87731e5eb1207..bdd2c2c4289a5 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -523,6 +523,124 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createControlHeightReductionLegacyPass());
}
+/// FIXME: Should LTO cause any
diff erences to this set of passes?
+void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
+ bool IsLTO) {
+ PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
+
+ if (IsLTO) {
+ // The vectorizer may have significantly shortened a loop body; unroll
+ // again. Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && !DisableUnrollLoops)
+ PM.add(createLoopUnrollAndJamPass(OptLevel));
+ PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
+ PM.add(createWarnMissedTransformationsPass());
+ }
+
+ if (!IsLTO) {
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ PM.add(createLoopLoadEliminationPass());
+ }
+ // Cleanup after the loop optimization passes.
+ PM.add(createInstructionCombiningPass());
+
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correlated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ PM.add(createEarlyCSEPass());
+ PM.add(createCorrelatedValuePropagationPass());
+ PM.add(createInstructionCombiningPass());
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+ PM.add(createCFGSimplificationPass());
+ PM.add(createInstructionCombiningPass());
+ }
+
+ if (IsLTO) {
+ PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
+ .hoistCommonInsts(true)));
+ } else {
+ // Now that we've formed fast to execute loop structures, we do further
+ // optimizations. These are run afterward as they might block doing complex
+ // analyses and transforms such as what are needed for loop vectorization.
+
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+ // GVN, loop transforms, and others have already run, so it's now better to
+ // convert to more optimized IR using more aggressive simplify CFG options.
+ // The extra sinking transform can create larger basic blocks, so do this
+ // before SLP vectorization.
+ PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
+ }
+ if (IsLTO) {
+ PM.add(createSCCPPass()); // Propagate exposed constants
+ PM.add(createInstructionCombiningPass()); // Clean up again
+ PM.add(createBitTrackingDCEPass());
+ }
+
+ // Optimize parallel scalar instruction chains into SIMD instructions.
+ if (SLPVectorize) {
+ PM.add(createSLPVectorizerPass());
+ if (OptLevel > 1 && ExtraVectorizerPasses)
+ PM.add(createEarlyCSEPass());
+ }
+
+ // Enhance/cleanup vector code.
+ PM.add(createVectorCombinePass());
+
+ if (!IsLTO) {
+ addExtensionsToPM(EP_Peephole, PM);
+ PM.add(createInstructionCombiningPass());
+
+ if (EnableUnrollAndJam && !DisableUnrollLoops) {
+ // Unroll and Jam. We do this before unroll but need to be in a separate
+ // loop pass manager in order for the outer loop to be processed by
+ // unroll and jam before the inner loop is unrolled.
+ PM.add(createLoopUnrollAndJamPass(OptLevel));
+ }
+
+ // Unroll small loops
+ PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
+
+ if (!DisableUnrollLoops) {
+ // LoopUnroll may generate some redundency to cleanup.
+ PM.add(createInstructionCombiningPass());
+
+ // Runtime unrolling will introduce runtime check in loop prologue. If the
+ // unrolled loop is a inner loop, then the prologue will be inside the
+ // outer loop. LICM pass can help to promote the runtime check out if the
+ // checked value is loop invariant.
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ }
+
+ PM.add(createWarnMissedTransformationsPass());
+ }
+
+ // After vectorization and unrolling, assume intrinsics may tell us more
+ // about pointer alignments.
+ PM.add(createAlignmentFromAssumptionsPass());
+
+ if (IsLTO)
+ PM.add(createInstructionCombiningPass());
+}
+
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
// Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
@@ -794,86 +912,7 @@ void PassManagerBuilder::populateModulePassManager(
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
MPM.add(createLoopDistributePass());
- MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
-
- // Eliminate loads by forwarding stores from the previous iteration to loads
- // of the current iteration.
- MPM.add(createLoopLoadEliminationPass());
-
- // FIXME: Because of #pragma vectorize enable, the passes below are always
- // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
- // on -O1 and no #pragma is found). Would be good to have these two passes
- // as function calls, so that we can only pass them when the vectorizer
- // changed the code.
- MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1 && ExtraVectorizerPasses) {
- // At higher optimization levels, try to clean up any runtime overlap and
- // alignment checks inserted by the vectorizer. We want to track correllated
- // runtime checks for two inner loops in the same outer loop, fold any
- // common computations, hoist loop-invariant aspects out of any outer loop,
- // and unswitch the runtime checks if possible. Once hoisted, we may have
- // dead (or speculatable) control flows or more combining opportunities.
- MPM.add(createEarlyCSEPass());
- MPM.add(createCorrelatedValuePropagationPass());
- MPM.add(createInstructionCombiningPass());
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
- MPM.add(createCFGSimplificationPass());
- MPM.add(createInstructionCombiningPass());
- }
-
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
- // GVN, loop transforms, and others have already run, so it's now better to
- // convert to more optimized IR using more aggressive simplify CFG options.
- // The extra sinking transform can create larger basic blocks, so do this
- // before SLP vectorization.
- MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
-
- if (SLPVectorize) {
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
- if (OptLevel > 1 && ExtraVectorizerPasses) {
- MPM.add(createEarlyCSEPass());
- }
- }
-
- // Enhance/cleanup vector code.
- MPM.add(createVectorCombinePass());
-
- addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createInstructionCombiningPass());
-
- if (EnableUnrollAndJam && !DisableUnrollLoops) {
- // Unroll and Jam. We do this before unroll but need to be in a separate
- // loop pass manager in order for the outer loop to be processed by
- // unroll and jam before the inner loop is unrolled.
- MPM.add(createLoopUnrollAndJamPass(OptLevel));
- }
-
- // Unroll small loops
- MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
-
- if (!DisableUnrollLoops) {
- // LoopUnroll may generate some redundency to cleanup.
- MPM.add(createInstructionCombiningPass());
-
- // Runtime unrolling will introduce runtime check in loop prologue. If the
- // unrolled loop is a inner loop, then the prologue will be inside the
- // outer loop. LICM pass can help to promote the runtime check out if the
- // checked value is loop invariant.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- }
-
- MPM.add(createWarnMissedTransformationsPass());
-
- // After vectorization and unrolling, assume intrinsics may tell us more
- // about pointer alignments.
- MPM.add(createAlignmentFromAssumptionsPass());
+ addVectorPasses(MPM, /* IsLTO */ false);
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
@@ -1083,35 +1122,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
PM.add(createLoopDistributePass());
- PM.add(createLoopVectorizePass(true, !LoopVectorize));
- // The vectorizer may have significantly shortened a loop body; unroll again.
- PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
-
- PM.add(createWarnMissedTransformationsPass());
-
- // Now that we've optimized loops (in particular loop induction variables),
- // we may have exposed more scalar opportunities. Run parts of the scalar
- // optimizer again at this point.
- PM.add(createInstructionCombiningPass()); // Initial cleanup
- PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
- .hoistCommonInsts(true)));
- PM.add(createSCCPPass()); // Propagate exposed constants
- PM.add(createInstructionCombiningPass()); // Clean up again
- PM.add(createBitTrackingDCEPass());
-
- // More scalar chains could be vectorized due to more alias information
- if (SLPVectorize)
- PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- PM.add(createVectorCombinePass()); // Clean up partial vectorization.
-
- // After vectorization, assume intrinsics may tell us more about pointer
- // alignments.
- PM.add(createAlignmentFromAssumptionsPass());
- // Cleanup and simplify the code after the scalar optimizations.
- PM.add(createInstructionCombiningPass());
+ addVectorPasses(PM, /* IsLTO */ true);
+
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
More information about the llvm-commits
mailing list