[llvm] fb45f3c - [SimpleLoopUnswitch] Skip non-trivial unswitching of cold functions

Ruobing Han via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 6 16:13:39 PDT 2022


Author: Ruobing Han
Date: 2022-09-06T19:13:31-04:00
New Revision: fb45f3c9486f5d9e3003db95386432562b23577c

URL: https://github.com/llvm/llvm-project/commit/fb45f3c9486f5d9e3003db95386432562b23577c
DIFF: https://github.com/llvm/llvm-project/commit/fb45f3c9486f5d9e3003db95386432562b23577c.diff

LOG: [SimpleLoopUnswitch] Skip non-trivial unswitching of cold functions

In the current main branch, all cold loops will not be applied non-trivial unswitch. As reported in D129599, skipping these cold loops will incur regression in SPEC benchmark.
Thus, instead of skipping cold loops, now only skipping loops in cold functions.

Reviewed By: alexgatea, aeubanks

Differential Revision: https://reviews.llvm.org/D133275

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
    llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
    llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 70f97d807cb54..e1d5bb5d6c8c0 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3086,7 +3086,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
   // Skip cold loops, as unswitching them brings little benefit
   // but increases the code size
   if (PSI && PSI->hasProfileSummary() && BFI &&
-      PSI->isColdBlock(L.getHeader(), BFI)) {
+      PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
     LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
     return false;
   }

diff  --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
index c442f04798978..eeb5014cb47f9 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
@@ -6,89 +6,27 @@
 
 declare i32 @a()
 declare i32 @b()
-
+; Check loops in cold functions will not be applied non-trivial loop unswitch
 define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 {
 ; CHECK-LABEL: @f1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[ENTRY_HOT_LOOP:%.*]]
-; CHECK:       entry_hot_loop:
-; CHECK-NEXT:    br i1 [[HOT_COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER:%.*]], label [[HOT_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
-; CHECK:       hot_loop_begin.preheader:
-; CHECK-NEXT:    br i1 [[COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
-; CHECK:       hot_loop_begin.preheader.split.us:
-; CHECK-NEXT:    br label [[HOT_LOOP_BEGIN_US:%.*]]
-; CHECK:       hot_loop_begin.us:
-; CHECK-NEXT:    br label [[HOT_LOOP_A_US:%.*]]
-; CHECK:       hot_loop_a.us:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @a()
-; CHECK-NEXT:    br label [[HOT_LOOP_LATCH_US:%.*]]
-; CHECK:       hot_loop_latch.us:
-; CHECK-NEXT:    [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
-; CHECK-NEXT:    br i1 [[V1_US]], label [[HOT_LOOP_BEGIN_US]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
-; CHECK:       hot_loop_exit.loopexit.split.us:
-; CHECK-NEXT:    br label [[HOT_LOOP_EXIT_LOOPEXIT:%.*]]
-; CHECK:       hot_loop_begin.preheader.split:
-; CHECK-NEXT:    br label [[HOT_LOOP_BEGIN:%.*]]
-; CHECK:       hot_loop_begin:
-; CHECK-NEXT:    br label [[HOT_LOOP_B:%.*]]
-; CHECK:       hot_loop_b:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @b()
-; CHECK-NEXT:    br label [[HOT_LOOP_LATCH:%.*]]
-; CHECK:       hot_loop_latch:
-; CHECK-NEXT:    [[V1:%.*]] = load i1, i1* [[PTR]], align 1
-; CHECK-NEXT:    br i1 [[V1]], label [[HOT_LOOP_BEGIN]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
-; CHECK:       hot_loop_exit.loopexit.split:
-; CHECK-NEXT:    br label [[HOT_LOOP_EXIT_LOOPEXIT]]
-; CHECK:       hot_loop_exit.loopexit:
-; CHECK-NEXT:    br label [[HOT_LOOP_EXIT]]
-; CHECK:       hot_loop_exit:
-; CHECK-NEXT:    br label [[ENTRY_COLD_LOOP:%.*]]
-; CHECK:       entry_cold_loop:
-; CHECK-NEXT:    br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
-; CHECK:       cold_loop_begin.preheader:
 ; CHECK-NEXT:    br label [[COLD_LOOP_BEGIN:%.*]]
 ; CHECK:       cold_loop_begin:
-; CHECK-NEXT:    br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
 ; CHECK:       cold_loop_a:
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @a()
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @a()
 ; CHECK-NEXT:    br label [[COLD_LOOP_LATCH:%.*]]
 ; CHECK:       cold_loop_b:
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @b()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @b()
 ; CHECK-NEXT:    br label [[COLD_LOOP_LATCH]]
 ; CHECK:       cold_loop_latch:
-; CHECK-NEXT:    [[V2:%.*]] = load i1, i1* [[PTR]], align 1
-; CHECK-NEXT:    br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
-; CHECK:       cold_loop_exit.loopexit:
-; CHECK-NEXT:    br label [[COLD_LOOP_EXIT]]
+; CHECK-NEXT:    [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1
+; CHECK-NEXT:    br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT:%.*]]
 ; CHECK:       cold_loop_exit:
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  br label %entry_hot_loop
-
-entry_hot_loop:
-  br i1 %hot_cond, label %hot_loop_begin, label %hot_loop_exit, !prof !15
-
-hot_loop_begin:
-  br i1 %cond, label %hot_loop_a, label %hot_loop_b
-
-hot_loop_a:
-  call i32 @a()
-  br label %hot_loop_latch
-
-hot_loop_b:
-  call i32 @b()
-  br label %hot_loop_latch
-
-hot_loop_latch:
-  %v1 = load i1, i1* %ptr
-  br i1 %v1, label %hot_loop_begin, label %hot_loop_exit
-
-hot_loop_exit:
-  br label %entry_cold_loop
-
-entry_cold_loop:
-  br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16
+  br label %cold_loop_begin
 
 cold_loop_begin:
   br i1 %cond, label %cold_loop_a, label %cold_loop_b
@@ -110,7 +48,7 @@ cold_loop_exit:
 }
 
 !llvm.module.flags = !{!1}
-!0 = !{!"function_entry_count", i64 400}
+!0 = !{!"function_entry_count", i64 0}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
 !3 = !{!"ProfileFormat", !"InstrProf"}
@@ -125,5 +63,3 @@ cold_loop_exit:
 !12 = !{i32 10000, i64 100, i32 1}
 !13 = !{i32 999000, i64 100, i32 1}
 !14 = !{i32 999999, i64 1, i32 2}
-!15 = !{!"branch_weights", i32 100, i32 0}
-!16 = !{!"branch_weights", i32 0, i32 100}

diff  --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll
index cc3df2faaa904..452b4d876d937 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll
@@ -8,25 +8,38 @@ declare i32 @b()
 ; Check loops will be applied non-trivial loop unswitch in a non-cold function,
 ; even loop headers are cold
 
-define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 {
+define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !14 {
 ; CHECK-LABEL: @f1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[ENTRY_COLD_LOOP:%.*]]
 ; CHECK:       entry_cold_loop:
 ; CHECK-NEXT:    br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
 ; CHECK:       cold_loop_begin.preheader:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
+; CHECK:       cold_loop_begin.preheader.split.us:
+; CHECK-NEXT:    br label [[COLD_LOOP_BEGIN_US:%.*]]
+; CHECK:       cold_loop_begin.us:
+; CHECK-NEXT:    br label [[COLD_LOOP_A_US:%.*]]
+; CHECK:       cold_loop_a.us:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @a()
+; CHECK-NEXT:    br label [[COLD_LOOP_LATCH_US:%.*]]
+; CHECK:       cold_loop_latch.us:
+; CHECK-NEXT:    [[V2_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
+; CHECK-NEXT:    br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
+; CHECK:       cold_loop_exit.loopexit.split.us:
+; CHECK-NEXT:    br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
+; CHECK:       cold_loop_begin.preheader.split:
 ; CHECK-NEXT:    br label [[COLD_LOOP_BEGIN:%.*]]
 ; CHECK:       cold_loop_begin:
-; CHECK-NEXT:    br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
-; CHECK:       cold_loop_a:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @a()
-; CHECK-NEXT:    br label [[COLD_LOOP_LATCH:%.*]]
+; CHECK-NEXT:    br label [[COLD_LOOP_B:%.*]]
 ; CHECK:       cold_loop_b:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @b()
-; CHECK-NEXT:    br label [[COLD_LOOP_LATCH]]
+; CHECK-NEXT:    br label [[COLD_LOOP_LATCH:%.*]]
 ; CHECK:       cold_loop_latch:
-; CHECK-NEXT:    [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1
-; CHECK-NEXT:    br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[V2:%.*]] = load i1, i1* [[PTR]], align 1
+; CHECK-NEXT:    br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
+; CHECK:       cold_loop_exit.loopexit.split:
+; CHECK-NEXT:    br label [[COLD_LOOP_EXIT_LOOPEXIT]]
 ; CHECK:       cold_loop_exit.loopexit:
 ; CHECK-NEXT:    br label [[COLD_LOOP_EXIT]]
 ; CHECK:       cold_loop_exit:
@@ -36,17 +49,17 @@ entry:
   br label %entry_cold_loop
 
 entry_cold_loop:
-  br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16
+  br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !15
 
 cold_loop_begin:
   br i1 %cond, label %cold_loop_a, label %cold_loop_b
 
 cold_loop_a:
-  call i32 @a()
+  %0 = call i32 @a()
   br label %cold_loop_latch
 
 cold_loop_b:
-  call i32 @b()
+  %1 = call i32 @b()
   br label %cold_loop_latch
 
 cold_loop_latch:
@@ -57,21 +70,21 @@ cold_loop_exit:
   ret void
 }
 
-!llvm.module.flags = !{!1}
-!0 = !{!"function_entry_count", i64 400}
-!1 = !{i32 1, !"ProfileSummary", !2}
-!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"InstrProf"}
-!4 = !{!"TotalCount", i64 10000}
-!5 = !{!"MaxCount", i64 10}
-!6 = !{!"MaxInternalCount", i64 1}
-!7 = !{!"MaxFunctionCount", i64 1000}
-!8 = !{!"NumCounts", i64 3}
-!9 = !{!"NumFunctions", i64 3}
-!10 = !{!"DetailedSummary", !11}
-!11 = !{!12, !13, !14}
-!12 = !{i32 10000, i64 100, i32 1}
-!13 = !{i32 999000, i64 100, i32 1}
-!14 = !{i32 999999, i64 1, i32 2}
-!15 = !{!"branch_weights", i32 100, i32 0}
-!16 = !{!"branch_weights", i32 0, i32 100}
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 400}
+!15 = !{!"branch_weights", i32 0, i32 100}


        


More information about the llvm-commits mailing list