[llvm] acea6e9 - [Passes] Only run extra vector passes if loops have been vectorized.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 10 03:43:03 PST 2021


Author: Florian Hahn
Date: 2021-12-10T11:42:45Z
New Revision: acea6e9cfa4c4a0e8678c7cacede3d0c965dfc36

URL: https://github.com/llvm/llvm-project/commit/acea6e9cfa4c4a0e8678c7cacede3d0c965dfc36
DIFF: https://github.com/llvm/llvm-project/commit/acea6e9cfa4c4a0e8678c7cacede3d0c965dfc36.diff

LOG: [Passes] Only run extra vector passes if loops have been vectorized.

This patch uses a similar trick as in D113947 to only run the extra
passes after vectorization on functions where loops have been
vectorized.

The reason for running the 'extra vector passes' is
simplification/unswitching of the runtime checks created by LV, there
should be no need to run them if nothing got vectorized

To do that, a new dummy analysis ShouldRunExtraVectorPasses has been
added. If loops have been vectorized for a function, LV will cache the
analysis. At the moment it uses MadeCFGChanges as proxy for loop
vectorized, which isn't perfect (it could be too aggressive, e.g.
because no runtime checks have been added), but should be good enough
for now.

The extra passes are now managed by a new FunctionPassManager that
runs its passes only if ShouldRunExtraVectorPasses has been cached.

Without this patch, `-extra-vectorizer-passes` has the following
compile-time impact:

NewPM-O3: +4.86%
NewPM-ReleaseThinLTO: +3.56%
NewPM-ReleaseLTO-g: +7.17%

http://llvm-compile-time-tracker.com/compare.php?from=ead3979a92fc33add4710c4510d6906260dcb4ad&to=c292da649e2c6e88a31e702fdc474727d09c72bc&stat=instructions

With this patch, that gets reduced to

NewPM-O3: +1.43%
NewPM-ReleaseThinLTO: +1.00%
NewPM-ReleaseLTO-g: +1.58%

http://llvm-compile-time-tracker.com/compare.php?from=ead3979a92fc33add4710c4510d6906260dcb4ad&to=e67d86b57810011cf285eb9aa1944781be6096f0&stat=instructions

It is probably still too high to enable by default, but much better.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D115052

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Other/opt-pipeline-vector-passes.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d105496ad47f1..463b5a0580528 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -80,6 +80,38 @@ class TargetTransformInfo;
 extern cl::opt<bool> EnableLoopInterleaving;
 extern cl::opt<bool> EnableLoopVectorization;
 
+/// A marker to determine if extra passes after loop vectorization should be
+/// run.
+struct ShouldRunExtraVectorPasses
+    : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> {
+  static AnalysisKey Key;
+  struct Result {
+    bool invalidate(Function &F, const PreservedAnalyses &PA,
+                    FunctionAnalysisManager::Invalidator &) {
+      // Check whether the analysis has been explicitly invalidated. Otherwise,
+      // it remains preserved.
+      auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>();
+      return !PAC.preservedWhenStateless();
+    }
+  };
+
+  Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
+};
+
+/// A pass manager to run a set of extra function simplification passes after
+/// vectorization, if requested. LoopVectorize caches the
+/// ShouldRunExtraVectorPasses analysis to request extra simplifications, if
+/// they could be beneficial.
+struct ExtraVectorPassManager : public FunctionPassManager {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
+    auto PA = PreservedAnalyses::all();
+    if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F))
+      PA.intersect(FunctionPassManager::run(F, AM));
+    PA.abandon<ShouldRunExtraVectorPasses>();
+    return PA;
+  }
+};
+
 struct LoopVectorizeOptions {
   /// If false, consider all loops for interleaving.
   /// If true, only loops that explicitly request interleaving are considered.

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index dbfa2a4f90e19..a6a36ff254029 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -995,26 +995,28 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
   FPM.addPass(InstCombinePass());
 
   if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+    ExtraVectorPassManager ExtraPasses;
     // At higher optimization levels, try to clean up any runtime overlap and
     // alignment checks inserted by the vectorizer. We want to track correlated
     // runtime checks for two inner loops in the same outer loop, fold any
     // common computations, hoist loop-invariant aspects out of any outer loop,
     // and unswitch the runtime checks if possible. Once hoisted, we may have
     // dead (or speculatable) control flows or more combining opportunities.
-    FPM.addPass(EarlyCSEPass());
-    FPM.addPass(CorrelatedValuePropagationPass());
-    FPM.addPass(InstCombinePass());
+    ExtraPasses.addPass(EarlyCSEPass());
+    ExtraPasses.addPass(CorrelatedValuePropagationPass());
+    ExtraPasses.addPass(InstCombinePass());
     LoopPassManager LPM;
     LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
     LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
                                        OptimizationLevel::O3));
-    FPM.addPass(
+    ExtraPasses.addPass(
         RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-    FPM.addPass(
+    ExtraPasses.addPass(
         createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
                                         /*UseBlockFrequencyInfo=*/true));
-    FPM.addPass(SimplifyCFGPass());
-    FPM.addPass(InstCombinePass());
+    ExtraPasses.addPass(SimplifyCFGPass());
+    ExtraPasses.addPass(InstCombinePass());
+    FPM.addPass(std::move(ExtraPasses));
   }
 
   // Now that we've formed fast to execute loop structures, we do further

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 3ad11acb773a8..74613a7fcce06 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -203,6 +203,7 @@ FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
 FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
 FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
 FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis())
+FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses())
 FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
 FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
 FUNCTION_ANALYSIS("targetir",

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 978b642d41413..410bf9881285d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -428,6 +428,8 @@ class GeneratedRTChecks;
 
 namespace llvm {
 
+AnalysisKey ShouldRunExtraVectorPasses::Key;
+
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
@@ -10746,8 +10748,17 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
       PA.preserve<LoopAnalysis>();
       PA.preserve<DominatorTreeAnalysis>();
     }
-    if (!Result.MadeCFGChange)
+
+    if (Result.MadeCFGChange) {
+      // Making CFG changes likely means a loop got vectorized. Indicate that
+      // extra simplification passes should be run.
+      // TODO: MadeCFGChanges is not a prefect proxy. Extra passes should only
+      // be run if runtime checks have been added.
+      AM.getResult<ShouldRunExtraVectorPasses>(F);
+      PA.preserve<ShouldRunExtraVectorPasses>();
+    } else {
       PA.preserveSet<CFGAnalyses>();
+    }
     return PA;
 }
 

diff  --git a/llvm/test/Other/opt-pipeline-vector-passes.ll b/llvm/test/Other/opt-pipeline-vector-passes.ll
index 3d475703e6e28..7801cde24462e 100644
--- a/llvm/test/Other/opt-pipeline-vector-passes.ll
+++ b/llvm/test/Other/opt-pipeline-vector-passes.ll
@@ -2,6 +2,9 @@
 ; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -force-vector-width=4 -S %s 2>&1 | FileCheck %s --check-prefixes=O2
 ; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -force-vector-width=4 -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=O2_EXTRA
 
+; When the loop doesn't get vectorized, no extra vector passes should run.
+; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -force-vector-width=0 -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=O2
+
 ; REQUIRES: asserts
 
 ; The loop vectorizer still runs at both -O1/-O2 even with the


        


More information about the llvm-commits mailing list