[llvm] f756f06 - [SimpleLoopUnswitch] Skip non-trivial unswitching of cold loops

Ruobing Han via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 8 11:12:50 PDT 2022


Author: Ruobing Han
Date: 2022-08-08T18:12:04Z
New Revision: f756f06cc471b91d81d7f7f15d9df76ed24de730

URL: https://github.com/llvm/llvm-project/commit/f756f06cc471b91d81d7f7f15d9df76ed24de730
DIFF: https://github.com/llvm/llvm-project/commit/f756f06cc471b91d81d7f7f15d9df76ed24de730.diff

LOG: [SimpleLoopUnswitch] Skip non-trivial unswitching of cold loops

With profile data, non-trivial LoopUnswitch will only apply on non-cold loops, as unswitching cold loops may not gain much benefit but significantly increase the code size.

Reviewed By: aeubanks, asbirlea

Differential Revision: https://reviews.llvm.org/D129599

Added: 
    

Modified: 
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
    llvm/test/Other/new-pm-defaults.ll
    llvm/test/Other/new-pm-thinlto-defaults.ll
    llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
    llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
    llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
    llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
    llvm/test/Transforms/LoopPredication/preserve-bpi.ll
    llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
    llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index af18479978f21..e77e5827a78f0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1399,8 +1399,10 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
         return Err;
       // Add the nested pass manager with the appropriate adaptor.
       bool UseMemorySSA = (Name == "loop-mssa");
-      bool UseBFI = llvm::any_of(
-          InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
+      bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
+        return Pipeline.Name.contains("licm") ||
+               Pipeline.Name.contains("simple-loop-unswitch");
+      });
       bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
         return Pipeline.Name == "loop-predication";
       });

diff  --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 0535608244cc2..01c0bf7786aca 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/GuardUtils.h"
@@ -26,6 +27,7 @@
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -3044,6 +3046,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
              bool NonTrivial,
              function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
              ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+             ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
              function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
   assert(L.isRecursivelyLCSSAForm(DT, LI) &&
          "Loops must be in LCSSA form before unswitching.");
@@ -3080,6 +3083,14 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
   if (L.getHeader()->getParent()->hasOptSize())
     return false;
 
+  // Skip cold loops, as unswitching them brings little benefit
+  // but increases the code size
+  if (PSI && PSI->hasProfileSummary() && BFI &&
+      PSI->isColdBlock(L.getHeader(), BFI)) {
+    LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
+    return false;
+  }
+
   // Skip non-trivial unswitching for loops that cannot be cloned.
   if (!L.isSafeToClone())
     return false;
@@ -3105,7 +3116,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
                                               LPMUpdater &U) {
   Function &F = *L.getHeader()->getParent();
   (void)F;
-
+  ProfileSummaryInfo *PSI = nullptr;
+  if (auto OuterProxy =
+          AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
+              .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
+    PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
   LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
                     << "\n");
 
@@ -3152,7 +3167,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
   }
   if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
                     UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
-                    DestroyLoopCB))
+                    PSI, AR.BFI, DestroyLoopCB))
     return PreservedAnalyses::all();
 
   if (AR.MSSA && VerifyMemorySSA)
@@ -3214,7 +3229,6 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
                     << "\n");
-
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -3251,9 +3265,9 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   if (VerifyMemorySSA)
     MSSA->verifyMemorySSA();
-
-  bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
-                              UnswitchCB, SE, &MSSAU, DestroyLoopCB);
+  bool Changed =
+      unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
+                   &MSSAU, nullptr, nullptr, DestroyLoopCB);
 
   if (VerifyMemorySSA)
     MSSA->verifyMemorySSA();

diff  --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 78eeab985fdbc..4ca8ca799f13d 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -174,6 +174,7 @@
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index aa40e7d528900..a6d6cdb76c4b1 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -137,6 +137,7 @@
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index b31dfe97e1ca2..f301d3de38248 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -110,6 +110,7 @@
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index d79c2c2559506..568b1fcf2b849 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -119,6 +119,7 @@
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index c604a20ba2489..448eb0e228e59 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -148,6 +148,7 @@
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index a8308022a0445..3ee1ba6f90591 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -114,6 +114,7 @@
 ; CHECK-O-NEXT: Running pass: LoopRotatePass
 ; CHECK-O-NEXT: Running pass: LICM
 ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Transforms/LoopPredication/preserve-bpi.ll b/llvm/test/Transforms/LoopPredication/preserve-bpi.ll
index f9eb6c890b79a..e41ffeaffec5c 100644
--- a/llvm/test/Transforms/LoopPredication/preserve-bpi.ll
+++ b/llvm/test/Transforms/LoopPredication/preserve-bpi.ll
@@ -10,6 +10,7 @@ declare void @llvm.experimental.guard(i1, ...)
 ; CHECK: Running pass: LoopPredicationPass on Loop at depth 1
 ; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
 ; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1
+; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-NEXT: Running pass: LoopPredicationPass on Loop at depth 1
 ; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
 ; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1

diff  --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
index 9c8fe12d06463..c442f04798978 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
@@ -46,31 +46,18 @@ define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !
 ; CHECK:       entry_cold_loop:
 ; CHECK-NEXT:    br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
 ; CHECK:       cold_loop_begin.preheader:
-; CHECK-NEXT:    br i1 [[COND]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
-; CHECK:       cold_loop_begin.preheader.split.us:
-; CHECK-NEXT:    br label [[COLD_LOOP_BEGIN_US:%.*]]
-; CHECK:       cold_loop_begin.us:
-; CHECK-NEXT:    br label [[COLD_LOOP_A_US:%.*]]
-; CHECK:       cold_loop_a.us:
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @a()
-; CHECK-NEXT:    br label [[COLD_LOOP_LATCH_US:%.*]]
-; CHECK:       cold_loop_latch.us:
-; CHECK-NEXT:    [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1
-; CHECK-NEXT:    br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
-; CHECK:       cold_loop_exit.loopexit.split.us:
-; CHECK-NEXT:    br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
-; CHECK:       cold_loop_begin.preheader.split:
 ; CHECK-NEXT:    br label [[COLD_LOOP_BEGIN:%.*]]
 ; CHECK:       cold_loop_begin:
-; CHECK-NEXT:    br label [[COLD_LOOP_B:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
+; CHECK:       cold_loop_a:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @a()
+; CHECK-NEXT:    br label [[COLD_LOOP_LATCH:%.*]]
 ; CHECK:       cold_loop_b:
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @b()
-; CHECK-NEXT:    br label [[COLD_LOOP_LATCH:%.*]]
+; CHECK-NEXT:    br label [[COLD_LOOP_LATCH]]
 ; CHECK:       cold_loop_latch:
 ; CHECK-NEXT:    [[V2:%.*]] = load i1, i1* [[PTR]], align 1
-; CHECK-NEXT:    br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
-; CHECK:       cold_loop_exit.loopexit.split:
-; CHECK-NEXT:    br label [[COLD_LOOP_EXIT_LOOPEXIT]]
+; CHECK-NEXT:    br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
 ; CHECK:       cold_loop_exit.loopexit:
 ; CHECK-NEXT:    br label [[COLD_LOOP_EXIT]]
 ; CHECK:       cold_loop_exit:

diff  --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll
index 455a385355764..3a193da617bfe 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll
@@ -18,6 +18,7 @@
 ; the analysis caches.
 ;
 ; CHECK: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 containing: %loop_begin<header>,%loop_b,%loop_b_inner,%loop_b_inner_exit,%loop_a,%loop_a_inner,%loop_a_inner_exit,%latch<latch><exiting>
+; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-NEXT: Clearing all analysis results for: loop_a_inner
 
 


        


More information about the llvm-commits mailing list