[llvm] 4274cbb - [PartialInliner]: Handle code regions in a switch stmt cases

Ettore Tiotto via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 2 11:33:09 PST 2020


Author: Ettore Tiotto
Date: 2020-11-02T14:32:45-05:00
New Revision: 4274cbba1c69a67020aa2ecebd4a4065425daf8a

URL: https://github.com/llvm/llvm-project/commit/4274cbba1c69a67020aa2ecebd4a4065425daf8a
DIFF: https://github.com/llvm/llvm-project/commit/4274cbba1c69a67020aa2ecebd4a4065425daf8a.diff

LOG: [PartialInliner]: Handle code regions in a switch stmt cases

This patch enhances computeOutliningColdRegionsInfo() to allow it to
consider regions containing a single basic block and a single
predecessor as candidate for partial inlining.

Reviewed By: fhann

Differential Revision: https://reviews.llvm.org/D89911

Added: 
    llvm/test/Transforms/PartialInlining/switch_stmt.ll

Modified: 
    llvm/lib/Transforms/IPO/PartialInlining.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 2827abe7a734..2bbf4bf110ae 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -414,11 +414,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
   std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
       std::make_unique<FunctionOutliningMultiRegionInfo>();
 
-  auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
-    BasicBlock *Dom = BlockList.front();
-    return BlockList.size() > 1 && Dom->hasNPredecessors(1);
-  };
-
   auto IsSingleExit =
       [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
     BasicBlock *ExitBlock = nullptr;
@@ -502,15 +497,24 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
 
       SmallVector<BasicBlock *, 8> DominateVector;
       DT.getDescendants(*SI, DominateVector);
+      assert(!DominateVector.empty() &&
+             "SI should be reachable and have at least itself as descendant");
 
       // We can only outline single entry regions (for now).
-      if (!IsSingleEntry(DominateVector))
+      if (!DominateVector.front()->hasNPredecessors(1)) {
+        LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
+                          << " doesn't have a single predecessor in the "
+                             "dominator tree\n";);
         continue;
+      }
 
       BasicBlock *ExitBlock = nullptr;
       // We can only outline single exit regions (for now).
-      if (!(ExitBlock = IsSingleExit(DominateVector)))
+      if (!(ExitBlock = IsSingleExit(DominateVector))) {
+        LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
+                          << " doesn't have a unique successor\n";);
         continue;
+      }
 
       int OutlineRegionCost = 0;
       for (auto *BB : DominateVector)
@@ -519,7 +523,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
       LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
                         << "\n";);
 
-      if (OutlineRegionCost < MinOutlineRegionCost) {
+      if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
         ORE.emit([&]() {
           return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
                                             &SI->front())
@@ -527,8 +531,12 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
                  << " inline cost-savings smaller than "
                  << ore::NV("Cost", MinOutlineRegionCost);
         });
+
+        LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
+                          << MinOutlineRegionCost << "\n";);
         continue;
       }
+
       // For now, ignore blocks that belong to a SISE region that is a
       // candidate for outlining.  In the future, we may want to look
       // at inner regions because the outer region may have live-exit

diff  --git a/llvm/test/Transforms/PartialInlining/switch_stmt.ll b/llvm/test/Transforms/PartialInlining/switch_stmt.ll
new file mode 100644
index 000000000000..e5587375236d
--- /dev/null
+++ b/llvm/test/Transforms/PartialInlining/switch_stmt.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes="partial-inliner" -skip-partial-inlining-cost-analysis -S < %s | FileCheck %s
+; RUN: opt -partial-inliner -skip-partial-inlining-cost-analysis -S < %s | FileCheck %s
+
+define dso_local signext i32 @callee(i32 signext %c1, i32 signext %c2) !prof !30 {
+; CHECK-LABEL: @callee(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, i32* [[RC]], align 4
+; CHECK-NEXT:    switch i32 [[C1:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ], !prof !31
+; CHECK:       sw.bb:
+; CHECK-NEXT:    store i32 1, i32* [[RC]], align 4
+; CHECK-NEXT:    br label [[SW_EPILOG:%.*]]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    store i32 2, i32* [[RC]], align 4
+; CHECK-NEXT:    br label [[SW_EPILOG]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    store i32 4, i32* [[RC]], align 4
+; CHECK-NEXT:    br label [[SW_EPILOG]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    store i32 [[C2:%.*]], i32* [[RC]], align 4
+; CHECK-NEXT:    br label [[SW_EPILOG]]
+; CHECK:       sw.epilog:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[RC]], align 4
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %rc = alloca i32, align 4
+  store i32 0, i32* %rc, align 4
+  switch i32 %c1, label %sw.default [
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  ], !prof !31
+
+sw.bb: ;; cold
+  store i32 1, i32* %rc, align 4
+  br label %sw.epilog
+
+sw.bb1:
+  store i32 2, i32* %rc, align 4
+  br label %sw.epilog
+
+sw.bb2: ;; cold
+  store i32 4, i32* %rc, align 4
+  br label %sw.epilog
+
+sw.default:
+  store i32 %c2, i32* %rc, align 4
+  br label %sw.epilog
+
+sw.epilog:
+  %0 = load i32, i32* %rc, align 4
+  ret i32 %0
+}
+
+define dso_local signext i32 @caller(i32 signext %c) !prof !30 {
+; CHECK-LABEL: @caller(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RC_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[RC_I]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]])
+; CHECK-NEXT:    store i32 0, i32* [[RC_I]], align 4
+; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT_I:%.*]] [
+; CHECK-NEXT:    i32 0, label [[CODEREPL_I:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1_I:%.*]]
+; CHECK-NEXT:    i32 2, label [[CODEREPL1_I:%.*]]
+; CHECK-NEXT:    ], !prof !31
+; CHECK:       codeRepl.i:
+; CHECK-NEXT:    call void @callee.1.sw.bb(i32* [[RC_I]])
+; CHECK-NEXT:    br label [[CALLEE_1_EXIT:%.*]]
+; CHECK:       sw.bb1.i:
+; CHECK-NEXT:    store i32 2, i32* [[RC_I]], align 4
+; CHECK-NEXT:    br label [[CALLEE_1_EXIT]]
+; CHECK:       codeRepl1.i:
+; CHECK-NEXT:    call void @callee.1.sw.bb2(i32* [[RC_I]])
+; CHECK-NEXT:    br label [[CALLEE_1_EXIT]]
+; CHECK:       sw.default.i:
+; CHECK-NEXT:    store i32 [[C]], i32* [[RC_I]], align 4
+; CHECK-NEXT:    br label [[CALLEE_1_EXIT]]
+; CHECK:       callee.1.exit:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[RC_I]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[RC_I]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP2]])
+;
+entry:
+  %0 = call signext i32 @callee(i32 signext %c, i32 signext %c)
+  ret i32 %0 
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 2}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1000}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 4}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29}
+!14 = !{i32 10000, i64 0, i32 0}
+!15 = !{i32 100000, i64 0, i32 0}
+!16 = !{i32 200000, i64 0, i32 0}
+!17 = !{i32 300000, i64 0, i32 0}
+!18 = !{i32 400000, i64 0, i32 0}
+!19 = !{i32 500000, i64 1, i32 2}
+!20 = !{i32 600000, i64 1, i32 2}
+!21 = !{i32 700000, i64 1, i32 2}
+!22 = !{i32 800000, i64 1, i32 2}
+!23 = !{i32 900000, i64 1, i32 2}
+!24 = !{i32 950000, i64 1, i32 2}
+!25 = !{i32 990000, i64 1, i32 2}
+!26 = !{i32 999000, i64 1, i32 2}
+!27 = !{i32 999900, i64 1, i32 2}
+!28 = !{i32 999990, i64 1, i32 2}
+!29 = !{i32 999999, i64 1, i32 2}
+!30 = !{!"function_entry_count", i64 1000}
+!31 = !{!"branch_weights", i32 500, i32 10, i32 150, i32 40}


        


More information about the llvm-commits mailing list