[llvm] a51f4af - [HCS] Externd to outline overlapping sub/super cold regions (#80732)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 22 09:04:12 PST 2024


Author: Shimin Cui
Date: 2024-02-22T12:04:08-05:00
New Revision: a51f4afc5aec8145091fead1d68c81e7d210fc0d

URL: https://github.com/llvm/llvm-project/commit/a51f4afc5aec8145091fead1d68c81e7d210fc0d
DIFF: https://github.com/llvm/llvm-project/commit/a51f4afc5aec8145091fead1d68c81e7d210fc0d.diff

LOG: [HCS] Externd to outline overlapping sub/super cold regions (#80732)

Currently, with hot cold splitting, when a cold region is identified, it
is added to the region list of ColdBlocks. Then when another cold region
(B) identified overlaps with a ColdBlocks region (A) already added to
the list, the region B is not added to the list because of the
overlapping with region A. The splitting analysis is performed, and the
region A may not get split, for example, if it’s considered too
expansive. This is to improve the handling the overlapping case when the
region A is not considered good for splitting, while the region B is
good for splitting.
 
The change is to move the cold region splitting analysis earlier to
allow more cold region splitting. If an identified region cannot be
split, it will not be added to the candidate list of ColdBlocks for
overlapping check.

Added: 
    llvm/test/Transforms/HotColdSplit/outline-inner-region.ll
    llvm/test/Transforms/HotColdSplit/outline-outer-region.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
    llvm/lib/Transforms/IPO/HotColdSplitting.cpp
    llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
    llvm/test/Transforms/HotColdSplit/eh-pads.ll
    llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
index c87c6453500c57..13dda6d61284c9 100644
--- a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
+++ b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -24,6 +24,7 @@ class TargetTransformInfo;
 class OptimizationRemarkEmitter;
 class AssumptionCache;
 class DominatorTree;
+class CodeExtractor;
 class CodeExtractorAnalysisCache;
 
 /// A sequence of basic blocks.
@@ -43,19 +44,17 @@ class HotColdSplitting {
 
 private:
   bool isFunctionCold(const Function &F) const;
-  bool isBasicBlockCold(BasicBlock* BB,
-                        BranchProbability ColdProbThresh,
-                        SmallPtrSetImpl<BasicBlock *> &ColdBlocks,
+  bool isBasicBlockCold(BasicBlock *BB, BranchProbability ColdProbThresh,
                         SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks,
                         BlockFrequencyInfo *BFI) const;
   bool shouldOutlineFrom(const Function &F) const;
   bool outlineColdRegions(Function &F, bool HasProfileSummary);
-  Function *extractColdRegion(const BlockSequence &Region,
+  bool isSplittingBeneficial(CodeExtractor &CE, const BlockSequence &Region,
+                             TargetTransformInfo &TTI);
+  Function *extractColdRegion(BasicBlock &EntryPoint, CodeExtractor &CE,
                               const CodeExtractorAnalysisCache &CEAC,
-                              DominatorTree &DT, BlockFrequencyInfo *BFI,
-                              TargetTransformInfo &TTI,
-                              OptimizationRemarkEmitter &ORE,
-                              AssumptionCache *AC, unsigned Count);
+                              BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+                              OptimizationRemarkEmitter &ORE);
   ProfileSummaryInfo *PSI;
   function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
   function_ref<TargetTransformInfo &(Function &)> GetTTI;

diff  --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index fabb3c5fb921df..5f03bd59b8cd1f 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -215,15 +215,10 @@ bool HotColdSplitting::isFunctionCold(const Function &F) const {
   return false;
 }
 
-bool HotColdSplitting::isBasicBlockCold(BasicBlock *BB,
-                          BranchProbability ColdProbThresh,
-                          SmallPtrSetImpl<BasicBlock *> &ColdBlocks,
-                          SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks,
-                          BlockFrequencyInfo *BFI) const {
-  // This block is already part of some outlining region.
-  if (ColdBlocks.count(BB))
-    return true;
-
+bool HotColdSplitting::isBasicBlockCold(
+    BasicBlock *BB, BranchProbability ColdProbThresh,
+    SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks,
+    BlockFrequencyInfo *BFI) const {
   if (BFI) {
     if (PSI->isColdBlock(BB, BFI))
       return true;
@@ -372,18 +367,12 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
   return Penalty;
 }
 
-Function *HotColdSplitting::extractColdRegion(
-    const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC,
-    DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
-    OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) {
+// Determine if it is beneficial to split the \p Region.
+bool HotColdSplitting::isSplittingBeneficial(CodeExtractor &CE,
+                                             const BlockSequence &Region,
+                                             TargetTransformInfo &TTI) {
   assert(!Region.empty());
 
-  // TODO: Pass BFI and BPI to update profile information.
-  CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
-                   /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
-                   /* AllowAlloca */ false, /* AllocaBlock */ nullptr,
-                   /* Suffix */ "cold." + std::to_string(Count));
-
   // Perform a simple cost/benefit analysis to decide whether or not to permit
   // splitting.
   SetVector<Value *> Inputs, Outputs, Sinks;
@@ -394,9 +383,18 @@ Function *HotColdSplitting::extractColdRegion(
   LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
                     << ", penalty = " << OutliningPenalty << "\n");
   if (!OutliningBenefit.isValid() || OutliningBenefit <= OutliningPenalty)
-    return nullptr;
+    return false;
+
+  return true;
+}
 
-  Function *OrigF = Region[0]->getParent();
+// Split the single \p EntryPoint cold region. \p CE is the region code
+// extractor.
+Function *HotColdSplitting::extractColdRegion(
+    BasicBlock &EntryPoint, CodeExtractor &CE,
+    const CodeExtractorAnalysisCache &CEAC, BlockFrequencyInfo *BFI,
+    TargetTransformInfo &TTI, OptimizationRemarkEmitter &ORE) {
+  Function *OrigF = EntryPoint.getParent();
   if (Function *OutF = CE.extractCodeRegion(CEAC)) {
     User *U = *OutF->user_begin();
     CallInst *CI = cast<CallInst>(U);
@@ -419,7 +417,7 @@ Function *HotColdSplitting::extractColdRegion(
     LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
     ORE.emit([&]() {
       return OptimizationRemark(DEBUG_TYPE, "HotColdSplit",
-                                &*Region[0]->begin())
+                                &*EntryPoint.begin())
              << ore::NV("Original", OrigF) << " split cold code into "
              << ore::NV("Split", OutF);
     });
@@ -428,9 +426,9 @@ Function *HotColdSplitting::extractColdRegion(
 
   ORE.emit([&]() {
     return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
-                                    &*Region[0]->begin())
+                                    &*EntryPoint.begin())
            << "Failed to extract region at block "
-           << ore::NV("Block", Region.front());
+           << ore::NV("Block", &EntryPoint);
   });
   return nullptr;
 }
@@ -620,16 +618,18 @@ class OutliningRegion {
 } // namespace
 
 bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
-  bool Changed = false;
-
-  // The set of cold blocks.
+  // The set of cold blocks outlined.
   SmallPtrSet<BasicBlock *, 4> ColdBlocks;
 
+  // The set of cold blocks cannot be outlined.
+  SmallPtrSet<BasicBlock *, 4> CannotBeOutlinedColdBlocks;
+
   // Set of cold blocks obtained with RPOT.
   SmallPtrSet<BasicBlock *, 4> AnnotatedColdBlocks;
 
-  // The worklist of non-intersecting regions left to outline.
-  SmallVector<OutliningRegion, 2> OutliningWorklist;
+  // The worklist of non-intersecting regions left to outline. The first member
+  // of the pair is the entry point into the region to be outlined.
+  SmallVector<std::pair<BasicBlock *, CodeExtractor>, 2> OutliningWorklist;
 
   // Set up an RPO traversal. Experimentally, this performs better (outlines
   // more) than a PO traversal, because we prevent region overlap by keeping
@@ -655,10 +655,18 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
   if (ColdBranchProbDenom.getNumOccurrences())
     ColdProbThresh = BranchProbability(1, ColdBranchProbDenom.getValue());
 
+  unsigned OutlinedFunctionID = 1;
   // Find all cold regions.
   for (BasicBlock *BB : RPOT) {
-    if (!isBasicBlockCold(BB, ColdProbThresh, ColdBlocks, AnnotatedColdBlocks,
-                          BFI))
+    // This block is already part of some outlining region.
+    if (ColdBlocks.count(BB))
+      continue;
+
+    // This block is already part of some region cannot be outlined.
+    if (CannotBeOutlinedColdBlocks.count(BB))
+      continue;
+
+    if (!isBasicBlockCold(BB, ColdProbThresh, AnnotatedColdBlocks, BFI))
       continue;
 
     LLVM_DEBUG({
@@ -681,50 +689,68 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
         return markFunctionCold(F);
       }
 
-      // If this outlining region intersects with another, drop the new region.
-      //
-      // TODO: It's theoretically possible to outline more by only keeping the
-      // largest region which contains a block, but the extra bookkeeping to do
-      // this is tricky/expensive.
-      bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
-        return !ColdBlocks.insert(Block.first).second;
-      });
-      if (RegionsOverlap)
-        continue;
+      do {
+        BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
+        LLVM_DEBUG({
+          dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
+          for (BasicBlock *BB : SubRegion)
+            BB->dump();
+        });
+
+        // TODO: Pass BFI and BPI to update profile information.
+        CodeExtractor CE(
+            SubRegion, &*DT, /* AggregateArgs */ false, /* BFI */ nullptr,
+            /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
+            /* AllowAlloca */ false, /* AllocaBlock */ nullptr,
+            /* Suffix */ "cold." + std::to_string(OutlinedFunctionID));
+
+        if (CE.isEligible() && isSplittingBeneficial(CE, SubRegion, TTI) &&
+            // If this outlining region intersects with another, drop the new
+            // region.
+            //
+            // TODO: It's theoretically possible to outline more by only keeping
+            // the largest region which contains a block, but the extra
+            // bookkeeping to do this is tricky/expensive.
+            none_of(SubRegion, [&](BasicBlock *Block) {
+              return ColdBlocks.contains(Block);
+            })) {
+          ColdBlocks.insert(SubRegion.begin(), SubRegion.end());
+
+          for (auto *Block : SubRegion) {
+            LLVM_DEBUG(dbgs()
+                       << "  contains cold block:" << Block->getName() << "\n");
+          }
+
+          OutliningWorklist.emplace_back(
+              std::make_pair(SubRegion[0], std::move(CE)));
+          ++OutlinedFunctionID;
+        } else {
+          // The cold block region cannot be outlined.
+          for (auto *Block : SubRegion)
+            if ((DT->dominates(BB, Block) && PDT->dominates(Block, BB)) ||
+                (PDT->dominates(BB, Block) && DT->dominates(Block, BB)))
+              // Will skip this cold block in the loop to save the compile time
+              CannotBeOutlinedColdBlocks.insert(Block);
+        }
+      } while (!Region.empty());
 
-      OutliningWorklist.emplace_back(std::move(Region));
       ++NumColdRegionsFound;
     }
   }
 
   if (OutliningWorklist.empty())
-    return Changed;
+    return false;
 
   // Outline single-entry cold regions, splitting up larger regions as needed.
-  unsigned OutlinedFunctionID = 1;
   // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
   CodeExtractorAnalysisCache CEAC(F);
-  do {
-    OutliningRegion Region = OutliningWorklist.pop_back_val();
-    assert(!Region.empty() && "Empty outlining region in worklist");
-    do {
-      BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
-      LLVM_DEBUG({
-        dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
-        for (BasicBlock *BB : SubRegion)
-          BB->dump();
-      });
-
-      Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI,
-                                             ORE, AC, OutlinedFunctionID);
-      if (Outlined) {
-        ++OutlinedFunctionID;
-        Changed = true;
-      }
-    } while (!Region.empty());
-  } while (!OutliningWorklist.empty());
+  for (auto &BCE : OutliningWorklist) {
+    Function *Outlined =
+        extractColdRegion(*BCE.first, BCE.second, CEAC, BFI, TTI, ORE);
+    assert(Outlined && "Should be outlined");
+  }
 
-  return Changed;
+  return true;
 }
 
 bool HotColdSplitting::run(Module &M) {

diff  --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
index 2154fb5cb5bc1e..8bc71148352d2c 100644
--- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
+++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
@@ -13,13 +13,13 @@ target triple = "aarch64"
 ; CHECK-NOT: @llvm.assume
 ; CHECK: }
 ; CHECK: declare {{.*}}@llvm.assume
-; CHECK: define {{.*}}@f.cold.1()
-; CHECK-LABEL: newFuncRoot:
-; CHECK: }
-; CHECK: define {{.*}}@f.cold.2(i64 %load1)
+; CHECK: define {{.*}}@f.cold.1(i64 %load1)
 ; CHECK-LABEL: newFuncRoot:
 ; CHECK: %cmp1 = icmp eq i64 %load1, 0
 ; CHECK-NOT: call void @llvm.assume
+; CHECK: define {{.*}}@f.cold.2()
+; CHECK-LABEL: newFuncRoot:
+; CHECK: }
 
 define void @f() {
 entry:

diff  --git a/llvm/test/Transforms/HotColdSplit/eh-pads.ll b/llvm/test/Transforms/HotColdSplit/eh-pads.ll
index 415c7e4b2bde3b..ad7baf97f68d0c 100644
--- a/llvm/test/Transforms/HotColdSplit/eh-pads.ll
+++ b/llvm/test/Transforms/HotColdSplit/eh-pads.ll
@@ -84,13 +84,16 @@ cold4:
 ; CHECK: sink
 
 ; CHECK-LABEL: define {{.*}}@bar.cold.1(
+; CHECK: sideeffect(i32 0)
+
+; CHECK-LABEL: define {{.*}}@bar.cold.2(
 ; CHECK: sideeffect(i32 1)
 
 ; CHECK-LABEL: define {{.*}}@baz.cold.1(
-; CHECK: sideeffect(i32 1)
+; CHECK: sideeffect(i32 0)
 
 ; CHECK-LABEL: define {{.*}}@baz.cold.2(
-; CHECK: sideeffect(i32 0)
+; CHECK: sideeffect(i32 1)
 
 declare void @sideeffect(i32)
 

diff  --git a/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll b/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
index 65f8aad4240667..0c055981260b2b 100644
--- a/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
+++ b/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
@@ -1,10 +1,10 @@
 ; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
 
 ; CHECK-LABEL: define {{.*}}@fun
-; CHECK: call {{.*}}@fun.cold.2(
-; CHECK-NEXT: ret void
 ; CHECK: call {{.*}}@fun.cold.1(
 ; CHECK-NEXT: ret void
+; CHECK: call {{.*}}@fun.cold.2(
+; CHECK-NEXT: ret void
 define void @fun() {
 entry:
   br i1 undef, label %A.then, label %A.else
@@ -49,9 +49,10 @@ B.cleanup:
 }
 
 ; CHECK-LABEL: define {{.*}}@fun.cold.1(
-; CHECK: %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
+; CHECK: %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
 ; CHECK-NEXT: unreachable
 
 ; CHECK-LABEL: define {{.*}}@fun.cold.2(
-; CHECK: %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
+; CHECK: %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
 ; CHECK-NEXT: unreachable
+

diff  --git a/llvm/test/Transforms/HotColdSplit/outline-inner-region.ll b/llvm/test/Transforms/HotColdSplit/outline-inner-region.ll
new file mode 100644
index 00000000000000..73398bf365ff04
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/outline-inner-region.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-max-params=1 < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-i64:64-n32"
+target triple = "powerpc64-ibm-aix7.2.0.0"
+
+define void @foo(i32 %cond) {
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK:       if.then:
+; CHECK:       br i1 {{.*}}, label %if.then1, label %codeRepl
+; CHECK-LABEL: codeRepl:
+; CHECK-NEXT:  call void @foo.cold.1
+;
+entry:
+  %cond.addr = alloca i32
+  store i32 %cond, ptr %cond.addr
+  %0 = load i32, ptr %cond.addr
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end2
+
+if.then:                                          ; preds = %entry
+  %1 = load i32, ptr %cond.addr
+  call void @sink(i32 %0)
+  %cmp = icmp sgt i32 %1, 10
+  br i1 %cmp, label %if.then1, label %if.else
+
+if.then1:                                         ; preds = %if.then
+  call void @sideeffect(i32 2)
+  br label %if.end
+
+if.else:                                          ; preds = %if.then
+  call void @sink(i32 0)
+  call void @sideeffect(i32 0)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then1
+  br label %if.end2
+
+if.end2:                                          ; preds = %entry
+  call void @sideeffect(i32 1)
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1
+; CHECK:       call {{.*}}@sink
+; CHECK-NEXT:  call {{.*}}@sideeffect
+
+declare void @sideeffect(i32)
+
+declare void @sink(i32) cold

diff  --git a/llvm/test/Transforms/HotColdSplit/outline-outer-region.ll b/llvm/test/Transforms/HotColdSplit/outline-outer-region.ll
new file mode 100644
index 00000000000000..4a3c96982a87b8
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/outline-outer-region.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-threshold=2 < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-i64:64-n32"
+target triple = "powerpc64-ibm-aix7.2.0.0"
+
+define void @foo(i32 %cond, i32 %s0, i32 %s1) {
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK:       br i1 {{.*}}, label %codeRepl, label %if.end2
+; CHECK-LABEL: codeRepl:
+; CHECK-NEXT: call void @foo.cold.1
+; CHECK-LABEL: if.end2:
+; CHECK: call void @sideeffect
+;
+entry:
+  %cond.addr = alloca i32
+  store i32 %cond, ptr %cond.addr
+  %0 = load i32, ptr %cond.addr
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end2
+
+if.then:                                          ; preds = %entry
+  %1 = load i32, ptr %cond.addr
+  %cmp = icmp sgt i32 %1, 10
+  br i1 %cmp, label %if.then1, label %if.else
+
+if.then1:                                         ; preds = %if.then
+  call void @sideeffect(i32 0)
+  br label %if.end
+
+if.else:                                          ; preds = %if.then
+  call void @sink(i32 %s0)
+  call void @sideeffect(i32 1)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then1
+  call void @sink(i32 %0)
+  ret void
+
+if.end2:                                          ; preds = %entry
+  call void @sideeffect(i32 %s1)
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1
+; CHECK: call {{.*}}@sink
+; CHECK: call {{.*}}@sideeffect
+; CHECK: call {{.*}}@sideeffect
+; CHECK: call {{.*}}@sink
+
+declare void @sideeffect(i32)
+
+declare void @sink(i32) cold


        


More information about the llvm-commits mailing list