[llvm] 7acfda6 - [llvm] Make new pass manager's OptimizationLevel a class

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 16 09:01:08 PST 2020


Author: Mircea Trofin
Date: 2020-01-16T09:00:56-08:00
New Revision: 7acfda633f1378344efde22bfed242dd56c26cdd

URL: https://github.com/llvm/llvm-project/commit/7acfda633f1378344efde22bfed242dd56c26cdd
DIFF: https://github.com/llvm/llvm-project/commit/7acfda633f1378344efde22bfed242dd56c26cdd.diff

LOG: [llvm] Make new pass manager's OptimizationLevel a class

Summary:
The old pass manager separated speed optimization and size optimization
levels into two unsigned values. Coallescing both in an enum in the new
pass manager may lead to unintentional casts and comparisons.

In particular, taking a look at how the loop unroll passes were constructed
previously, the Os/Oz are now (==new pass manager) treated just like O3,
likely unintentionally.

This change disallows raw comparisons between optimization levels, to
avoid such unintended effects. As an effect, the O{s|z} behavior changes
for loop unrolling and loop unroll and jam, matching O2 rather than O3.

The change also parameterizes the threshold values used for loop
unrolling, primarily to aid testing.

Reviewers: tejohnson, davidxl

Reviewed By: tejohnson

Subscribers: zzheng, ychen, mehdi_amini, hiraditya, steven_wu, dexonsmith, dang, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72547

Added: 
    llvm/test/Transforms/LoopUnroll/opt-levels.ll
    llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll

Modified: 
    clang/lib/CodeGen/BackendUtil.cpp
    llvm/include/llvm/Passes/PassBuilder.h
    llvm/lib/LTO/LTOBackend.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 0bfcab88a3a9..f379c103b694 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -924,7 +924,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
     llvm_unreachable("Invalid optimization level!");
 
   case 1:
-    return PassBuilder::O1;
+    return PassBuilder::OptimizationLevel::O1;
 
   case 2:
     switch (Opts.OptimizeSize) {
@@ -932,17 +932,17 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
       llvm_unreachable("Invalid optimization level for size!");
 
     case 0:
-      return PassBuilder::O2;
+      return PassBuilder::OptimizationLevel::O2;
 
     case 1:
-      return PassBuilder::Os;
+      return PassBuilder::OptimizationLevel::Os;
 
     case 2:
-      return PassBuilder::Oz;
+      return PassBuilder::OptimizationLevel::Oz;
     }
 
   case 3:
-    return PassBuilder::O3;
+    return PassBuilder::OptimizationLevel::O3;
   }
 }
 

diff  --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index e7db8fd421fe..828ac004117e 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -143,11 +143,26 @@ class PassBuilder {
   ///
   /// This enumerates the LLVM-provided high-level optimization levels. Each
   /// level has a specific goal and rationale.
-  enum OptimizationLevel {
+  class OptimizationLevel final {
+    unsigned SpeedLevel = 2;
+    unsigned SizeLevel = 0;
+    OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
+        : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
+      // Check that only valid combinations are passed.
+      assert(SpeedLevel <= 3 &&
+             "Optimization level for speed should be 0, 1, 2, or 3");
+      assert(SizeLevel <= 2 &&
+             "Optimization level for size should be 0, 1, or 2");
+      assert((SizeLevel == 0 || SpeedLevel == 2) &&
+             "Optimize for size should be encoded with speedup level == 2");
+    }
+
+  public:
+    OptimizationLevel() = default;
     /// Disable as many optimizations as possible. This doesn't completely
     /// disable the optimizer in all cases, for example always_inline functions
     /// can be required to be inlined for correctness.
-    O0,
+    static const OptimizationLevel O0;
 
     /// Optimize quickly without destroying debuggability.
     ///
@@ -161,10 +176,9 @@ class PassBuilder {
     ///
     /// As an example, complex loop transformations such as versioning,
     /// vectorization, or fusion don't make sense here due to the degree to
-    /// which the executed code 
diff ers from the source code, and the compile time
-    /// cost.
-    O1,
-
+    /// which the executed code 
diff ers from the source code, and the compile
+    /// time cost.
+    static const OptimizationLevel O1;
     /// Optimize for fast execution as much as possible without triggering
     /// significant incremental compile time or code size growth.
     ///
@@ -181,8 +195,7 @@ class PassBuilder {
     ///
     /// This is expected to be a good default optimization level for the vast
     /// majority of users.
-    O2,
-
+    static const OptimizationLevel O2;
     /// Optimize for fast execution as much as possible.
     ///
     /// This mode is significantly more aggressive in trading off compile time
@@ -197,8 +210,7 @@ class PassBuilder {
     /// order to make even significantly slower compile times at least scale
     /// reasonably. This does not preclude very substantial constant factor
     /// costs though.
-    O3,
-
+    static const OptimizationLevel O3;
     /// Similar to \c O2 but tries to optimize for small code size instead of
     /// fast execution without triggering significant incremental execution
     /// time slowdowns.
@@ -209,8 +221,7 @@ class PassBuilder {
     /// A consequence of the 
diff erent core goal is that this should in general
     /// produce substantially smaller executables that still run in
     /// a reasonable amount of time.
-    Os,
-
+    static const OptimizationLevel Os;
     /// A very specialized mode that will optimize for code size at any and all
     /// costs.
     ///
@@ -218,7 +229,24 @@ class PassBuilder {
     /// any effort taken to reduce the size is worth it regardless of the
     /// execution time impact. You should expect this level to produce rather
     /// slow, but very small, code.
-    Oz
+    static const OptimizationLevel Oz;
+
+    bool isOptimizingForSpeed() const {
+      return SizeLevel == 0 && SpeedLevel > 0;
+    }
+
+    bool isOptimizingForSize() const { return SizeLevel > 0; }
+
+    bool operator==(const OptimizationLevel &Other) const {
+      return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
+    }
+    bool operator!=(const OptimizationLevel &Other) const {
+      return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
+    }
+
+    unsigned getSpeedupLevel() const { return SpeedLevel; }
+
+    unsigned getSizeLevel() const { return SizeLevel; }
   };
 
   explicit PassBuilder(TargetMachine *TM = nullptr,

diff  --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index dcde7277b820..3cd84ad1dacd 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -203,16 +203,16 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
   default:
     llvm_unreachable("Invalid optimization level");
   case 0:
-    OL = PassBuilder::O0;
+    OL = PassBuilder::OptimizationLevel::O0;
     break;
   case 1:
-    OL = PassBuilder::O1;
+    OL = PassBuilder::OptimizationLevel::O1;
     break;
   case 2:
-    OL = PassBuilder::O2;
+    OL = PassBuilder::OptimizationLevel::O2;
     break;
   case 3:
-    OL = PassBuilder::O3;
+    OL = PassBuilder::OptimizationLevel::O3;
     break;
   }
 

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53b7db8689c4..a65ea0d9e370 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -244,20 +244,24 @@ extern cl::opt<bool> EnableOrderFileInstrumentation;
 
 extern cl::opt<bool> FlattenedProfileUsed;
 
-static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
-  switch (Level) {
-  case PassBuilder::O0:
-  case PassBuilder::O1:
-  case PassBuilder::O2:
-  case PassBuilder::O3:
-    return false;
-
-  case PassBuilder::Os:
-  case PassBuilder::Oz:
-    return true;
-  }
-  llvm_unreachable("Invalid optimization level!");
-}
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
+    /*SpeedLevel*/ 0,
+    /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
+    /*SpeedLevel*/ 1,
+    /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
+    /*SpeedLevel*/ 2,
+    /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
+    /*SpeedLevel*/ 3,
+    /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
+    /*SpeedLevel*/ 2,
+    /*SizeLevel*/ 1};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
+    /*SpeedLevel*/ 2,
+    /*SizeLevel*/ 2};
 
 namespace {
 
@@ -396,7 +400,7 @@ FunctionPassManager
 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
                                                  ThinLTOPhase Phase,
                                                  bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations!");
+  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
   FunctionPassManager FPM(DebugLogging);
 
   // Form SSA out of local memory accesses after breaking apart aggregates into
@@ -407,7 +411,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
 
   // Hoisting of scalars and load expressions.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() > 1) {
     if (EnableGVNHoist)
       FPM.addPass(GVNHoistPass());
 
@@ -419,7 +423,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   }
 
   // Speculative execution if the target has divergent branches; otherwise nop.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() > 1) {
     FPM.addPass(SpeculativeExecutionPass());
 
     // Optimize based on known information about branches, and cleanup afterward.
@@ -427,11 +431,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
     FPM.addPass(CorrelatedValuePropagationPass());
   }
   FPM.addPass(SimplifyCFGPass());
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     FPM.addPass(AggressiveInstCombinePass());
   FPM.addPass(InstCombinePass());
 
-  if (!isOptimizingForSize(Level))
+  if (!Level.isOptimizingForSize())
     FPM.addPass(LibCallsShrinkWrapPass());
 
   invokePeepholeEPCallbacks(FPM, Level);
@@ -439,11 +443,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
   // using the size value profile. Don't perform this when optimizing for size.
   if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
-      !isOptimizingForSize(Level) && Level > O1)
+      (Level.getSpeedupLevel() > 1 && !Level.isOptimizingForSize()))
     FPM.addPass(PGOMemOPSizeOpt());
 
   // TODO: Investigate the cost/benefit of tail call elimination on debugging.
-  if (Level > O1)
+  if (Level.getSpeedupLevel() > 1)
     FPM.addPass(TailCallElimPass());
   FPM.addPass(SimplifyCFGPass());
 
@@ -470,7 +474,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   LPM1.addPass(LoopSimplifyCFGPass());
 
   // Rotate Loop - disable header duplication at -Oz
-  LPM1.addPass(LoopRotatePass(Level != Oz));
+  LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz));
   // TODO: Investigate promotion cap for O1.
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
   LPM1.addPass(SimpleLoopUnswitchPass());
@@ -487,7 +491,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
        PGOOpt->Action != PGOOptions::SampleUse) &&
       PTO.LoopUnrolling)
-    LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false,
+    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+                                    /*OnlyWhenForced=*/false,
                                     PTO.ForgetAllSCEVInLoopUnroll));
 
   for (auto &C : LoopOptimizerEndEPCallbacks)
@@ -510,7 +515,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(SROA());
 
   // Eliminate redundancies.
-  if (Level != O1) {
+  if (Level != OptimizationLevel::O1) {
     // These passes add substantial compile time so skip them at O1.
     FPM.addPass(MergedLoadStoreMotionPass());
     if (RunNewGVN)
@@ -539,7 +544,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
 
   // Re-consider control flow based optimizations after redundancy elimination,
   // redo DCE, etc.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() > 1) {
     FPM.addPass(JumpThreadingPass());
     FPM.addPass(CorrelatedValuePropagationPass());
     FPM.addPass(DSEPass());
@@ -559,7 +564,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(FPM, Level);
 
-  if (EnableCHR && Level == O3 && PGOOpt &&
+  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
       (PGOOpt->Action == PGOOptions::IRUse ||
        PGOOpt->Action == PGOOptions::SampleUse))
     FPM.addPass(ControlHeightReductionPass());
@@ -572,13 +577,13 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
                                     bool RunProfileGen, bool IsCS,
                                     std::string ProfileFile,
                                     std::string ProfileRemappingFile) {
-  assert(Level != O0 && "Not expecting O0 here!");
+  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
   // Generally running simplification passes and the inliner with an high
   // threshold results in smaller executables, but there may be cases where
   // the size grows, so let's be conservative here and skip this simplification
   // at -Os/Oz. We will not do this  inline for context sensistive PGO (when
   // IsCS is true).
-  if (!isOptimizingForSize(Level) && !IsCS) {
+  if (!Level.isOptimizingForSize() && !IsCS) {
     InlineParams IP;
 
     IP.DefaultThreshold = PreInlineThreshold;
@@ -663,10 +668,7 @@ void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
 
 static InlineParams
 getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
-  auto O3 = PassBuilder::O3;
-  unsigned OptLevel = Level > O3 ? 2 : Level;
-  unsigned SizeLevel = Level > O3 ? Level - O3 : 0;
-  return getInlineParams(OptLevel, SizeLevel);
+  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
 }
 
 ModulePassManager
@@ -712,7 +714,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   EarlyFPM.addPass(SROA());
   EarlyFPM.addPass(EarlyCSEPass());
   EarlyFPM.addPass(LowerExpectIntrinsicPass());
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     EarlyFPM.addPass(CallSiteSplittingPass());
 
   // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
@@ -831,7 +833,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
 
   // When at O3 add argument promotion to the pass pipeline.
   // FIXME: It isn't at all clear why this should be limited to O3.
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     MainCGPipeline.addPass(ArgumentPromotionPass());
 
   // Lastly, add the core function simplification pipeline nested inside the
@@ -975,11 +977,11 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
   // across the loop nests.
   // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
   if (EnableUnrollAndJam && PTO.LoopUnrolling) {
-    OptimizePM.addPass(LoopUnrollAndJamPass(Level));
+    OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
   }
-  OptimizePM.addPass(LoopUnrollPass(
-      LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,
-                        PTO.ForgetAllSCEVInLoopUnroll)));
+  OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(
+      Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+      PTO.ForgetAllSCEVInLoopUnroll)));
   OptimizePM.addPass(WarnMissedTransformationsPass());
   OptimizePM.addPass(InstCombinePass());
   OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
@@ -1041,7 +1043,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
 ModulePassManager
 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
                                            bool DebugLogging, bool LTOPreLink) {
-  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
 
   ModulePassManager MPM(DebugLogging);
 
@@ -1068,7 +1071,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
 ModulePassManager
 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
                                                 bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
 
   ModulePassManager MPM(DebugLogging);
 
@@ -1129,7 +1133,7 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
     MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
   }
 
-  if (Level == O0)
+  if (Level == OptimizationLevel::O0)
     return MPM;
 
   // Force any function attributes we want the rest of the pipeline to observe.
@@ -1148,10 +1152,11 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
 ModulePassManager
 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
                                             bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
   // FIXME: We should use a customized pre-link pipeline!
   return buildPerModuleDefaultPipeline(Level, DebugLogging,
-                                       /* LTOPreLink */true);
+                                       /* LTOPreLink */ true);
 }
 
 ModulePassManager
@@ -1159,7 +1164,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
                                      ModuleSummaryIndex *ExportSummary) {
   ModulePassManager MPM(DebugLogging);
 
-  if (Level == O0) {
+  if (Level == OptimizationLevel::O0) {
     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
     // metadata and intrinsics.
     MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
@@ -1188,7 +1193,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   // libraries and other oracles.
   MPM.addPass(InferFunctionAttrsPass());
 
-  if (Level > 1) {
+  if (Level.getSpeedupLevel() > 1) {
     FunctionPassManager EarlyFPM(DebugLogging);
     EarlyFPM.addPass(CallSiteSplittingPass());
     MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
@@ -1226,7 +1231,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
 
   // Stop here at -O1.
-  if (Level == 1) {
+  if (Level == OptimizationLevel::O1) {
     // The LowerTypeTestsPass needs to run to lower type metadata and the
     // type.test intrinsics. The pass does nothing if CFI is disabled.
     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
@@ -1251,7 +1256,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   // function pointers.  When this happens, we often have to resolve varargs
   // calls, etc, so let instcombine do this.
   FunctionPassManager PeepholeFPM(DebugLogging);
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     PeepholeFPM.addPass(AggressiveInstCombinePass());
   PeepholeFPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(PeepholeFPM, Level);
@@ -1887,13 +1892,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
     assert(Matches.size() == 3 && "Must capture two matched strings!");
 
     OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
-                              .Case("O0", O0)
-                              .Case("O1", O1)
-                              .Case("O2", O2)
-                              .Case("O3", O3)
-                              .Case("Os", Os)
-                              .Case("Oz", Oz);
-    if (L == O0) {
+                              .Case("O0", OptimizationLevel::O0)
+                              .Case("O1", OptimizationLevel::O1)
+                              .Case("O2", OptimizationLevel::O2)
+                              .Case("O3", OptimizationLevel::O3)
+                              .Case("Os", OptimizationLevel::Os)
+                              .Case("Oz", OptimizationLevel::Oz);
+    if (L == OptimizationLevel::O0) {
       // Add instrumentation PGO passes -- at O0 we can still do PGO.
       if (PGOOpt && Matches[1] != "thinlto" &&
           (PGOOpt->Action == PGOOptions::IRInstr ||
@@ -1910,8 +1915,10 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
     // This is consistent with old pass manager invoked via opt, but
     // inconsistent with clang. Clang doesn't enable loop vectorization
     // but does enable slp vectorization at Oz.
-    PTO.LoopVectorization = L > O1 && L < Oz;
-    PTO.SLPVectorization = L > O1 && L < Oz;
+    PTO.LoopVectorization =
+        L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
+    PTO.SLPVectorization =
+        L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
 
     if (Matches[1] == "default") {
       MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index bb0459bfafc6..a3994eab38a8 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -171,6 +171,16 @@ static cl::opt<bool> UnrollRevisitChildLoops(
              "This shouldn't typically be needed as child loops (or their "
              "clones) were already visited."));
 
+static cl::opt<unsigned> UnrollThresholdAggressive(
+    "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
+    cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
+             "optimizations"));
+static cl::opt<unsigned>
+    UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
+                           cl::Hidden,
+                           cl::desc("Default threshold (max size of unrolled "
+                                    "loop), used in all but O3 optimizations"));
+
 /// A magic value for use with the Threshold parameter to indicate
 /// that the loop unroll should be performed regardless of how much
 /// code expansion would result.
@@ -189,7 +199,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
   TargetTransformInfo::UnrollingPreferences UP;
 
   // Set up the defaults
-  UP.Threshold = OptLevel > 2 ? 300 : 150;
+  UP.Threshold =
+      OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault;
   UP.MaxPercentThresholdBoost = 400;
   UP.OptSizeThreshold = 0;
   UP.PartialThreshold = 150;

diff  --git a/llvm/test/Transforms/LoopUnroll/opt-levels.ll b/llvm/test/Transforms/LoopUnroll/opt-levels.ll
new file mode 100644
index 000000000000..ed0abc7672e0
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/opt-levels.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -passes="default<O2>" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
+; RUN: opt < %s -S -passes="default<O3>" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
+; RUN: opt < %s -S -passes="default<Os>" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
+; RUN: opt < %s -S -passes="default<Oz>" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
+
+; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
+; the behavior, we artificially disable unrolling for anything but O3 by setting
+; the default threshold to 0.
+
+; O3:     loop2.preheader
+; O2-NOT: loop2.preheader
+; Os-NOT: loop2.preheader
+; Oz-NOT: loop2.preheader
+
+define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  %offset1 = getelementptr i32, i32* %addr1, i32 %iv1
+  store i32 %iv1, i32* %offset1, align 4
+  br label %loop2.header
+
+loop2.header:
+  %e = icmp uge i32 %iter, 1
+  br i1 %e, label %loop2, label %exit2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %offset2 = getelementptr i32, i32* %addr2, i32 %iv2
+  store i32 %iv2, i32* %offset2, align 4
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, %iter
+  br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+  br label %loop1.latch
+
+loop1.latch:
+  %inc1 = add i32 %iv1, 1
+  %exitcnd1 = icmp uge i32 %inc1, 1024
+  br i1 %exitcnd1, label %exit, label %loop1
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll b/llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll
new file mode 100644
index 000000000000..ef6d2c7502fb
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -S -passes="default<O2>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
+; RUN: opt < %s -S -passes="default<O3>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
+; RUN: opt < %s -S -passes="default<Os>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
+; RUN: opt < %s -S -passes="default<Oz>" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
+
+; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
+; the behavior, we artificially disable unrolling for anything but O3 by setting
+; the default threshold to 0.
+
+; O3:     for.inner.1
+; O2-NOT: for.inner.1
+; Os-NOT: for.inner.1
+; Oz-NOT: for.inner.1
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmpJ = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmpJ
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  %add = add i32 %0, %sum
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}


        


More information about the llvm-commits mailing list