[llvm] r316711 - [CGP] Merge empty case blocks if no extra moves are added.

Balaram Makam via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 26 15:34:01 PDT 2017


Author: bmakam
Date: Thu Oct 26 15:34:01 2017
New Revision: 316711

URL: http://llvm.org/viewvc/llvm-project?rev=316711&view=rev
Log:
[CGP] Merge empty case blocks if no extra moves are added.

Summary:
Currently we skip merging when extra moves may be added in the header of switch instead of the case block, if the case block is used as an incoming
block of a PHI. If all the incoming values of the PHIs are non-constants and the destination block is dominated by the switch block then extra moves are likely not added by ISel, so there is no need to skip merging in this case.

Reviewers: efriedma, junbuml, davidxl, hfinkel, qcolombet

Reviewed By: efriedma

Subscribers: dberlin, kuhar, mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D37343

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=316711&r1=316710&r2=316711&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Thu Oct 26 15:34:01 2017
@@ -212,6 +212,7 @@ class TypePromotionTransaction;
     const TargetTransformInfo *TTI = nullptr;
     const TargetLibraryInfo *TLInfo;
     const LoopInfo *LI;
+    DominatorTree *DT;
     std::unique_ptr<BlockFrequencyInfo> BFI;
     std::unique_ptr<BranchProbabilityInfo> BPI;
 
@@ -262,6 +263,7 @@ class TypePromotionTransaction;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       // FIXME: When we can selectively preserve passes, preserve the domtree.
+      AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<ProfileSummaryInfoWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
@@ -343,6 +345,8 @@ bool CodeGenPrepare::runOnFunction(Funct
   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
   OptSize = F.optForSize();
 
   if (ProfileGuidedSectionPrefix) {
@@ -755,6 +759,11 @@ bool CodeGenPrepare::isMergingEmptyBlock
     return true;
 
   SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
+  SmallVector<PHINode *, 16> PNs;
+
+  for (auto DestBBI = DestBB->begin();
+       auto *DestPN = dyn_cast<PHINode>(&*DestBBI); ++DestBBI)
+    PNs.push_back(DestPN);
 
   // Find all other incoming blocks from which incoming values of all PHIs in
   // DestBB are the same as the ones from BB.
@@ -764,16 +773,10 @@ bool CodeGenPrepare::isMergingEmptyBlock
     if (DestBBPred == BB)
       continue;
 
-    bool HasAllSameValue = true;
-    BasicBlock::const_iterator DestBBI = DestBB->begin();
-    while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
-      if (DestPN->getIncomingValueForBlock(BB) !=
-          DestPN->getIncomingValueForBlock(DestBBPred)) {
-        HasAllSameValue = false;
-        break;
-      }
-    }
-    if (HasAllSameValue)
+    if (llvm::all_of(PNs, [&](PHINode *PN) {
+      return (PN->getIncomingValueForBlock(BB) ==
+              PN->getIncomingValueForBlock(DestBBPred));
+        }))
       SameIncomingValueBBs.insert(DestBBPred);
   }
 
@@ -783,6 +786,14 @@ bool CodeGenPrepare::isMergingEmptyBlock
   if (SameIncomingValueBBs.count(Pred))
     return true;
 
+  // Check to see if none of the phis have constant incoming values for BB and
+  // Pred dominates DestBB, in such case extra COPYs are likely not added, so
+  // there is no reason to skip merging.
+  if (DT->dominates(Pred, DestBB) && llvm::none_of(PNs, [BB](PHINode *PN) {
+        return isa<Constant>(PN->getIncomingValueForBlock(BB));
+      }))
+    return true;
+
   if (!BFI) {
     Function &F = *BB->getParent();
     LoopInfo LI{DominatorTree(F)};
@@ -885,7 +896,7 @@ void CodeGenPrepare::eliminateMostlyEmpt
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
+      MergeBasicBlockIntoOnlyPred(DestBB, DT);
 
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -926,7 +937,21 @@ void CodeGenPrepare::eliminateMostlyEmpt
 
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
+  SmallVector<DominatorTree::UpdateType, 3> Updates;
+  for (auto *PredBB : predecessors(BB)) {
+    if (PredBB == BB)
+      continue;
+    DominatorTree::UpdateType UT = {DominatorTree::Delete, PredBB, BB};
+    if (!is_contained(Updates, UT)) {
+      Updates.push_back(UT);
+      if (PredBB != DestBB)
+        Updates.push_back({DominatorTree::Insert, PredBB, DestBB});
+    }
+  }
   BB->replaceAllUsesWith(DestBB);
+  DT->applyUpdates(Updates);
+  BB->getTerminator()->eraseFromParent();
+  DT->deleteEdge(BB, DestBB);
   BB->eraseFromParent();
   ++NumBlocksElim;
 

Modified: llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll?rev=316711&r1=316710&r2=316711&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll Thu Oct 26 15:34:01 2017
@@ -143,11 +143,107 @@ declare void @calldefault(...) local_unn
 !1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1}
 !2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1}
 
+; while.cond does not dominate return, expect to skip merging empty block
+; return.loopexit into return.
+ at b = external global i32, align 4
+ at a = external global i32*, align 8
+
+define void @f_switch4(i32 %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @f_switch4
+entry:
+  %0 = load i32, i32* @b, align 4
+  %cond = icmp eq i32 %0, 6
+  br i1 %cond, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %add = add i32 %i, 2
+  %1 = load i32*, i32** @a, align 8
+  %magicptr = ptrtoint i32* %1 to i32
+  br label %while.cond
+
+; CHECK-LABEL: while.cond:
+; CHECK: i32 0, label %return.loopexit
+; CHECK: i32 47, label %return.loopexit
+while.cond:                                       ; preds = %while.cond.backedge, %if.end
+  switch i32 %magicptr, label %while.cond.if.end10_crit_edge [
+    i32 0, label %return.loopexit
+    i32 47, label %return.loopexit
+  ]
+
+while.cond.if.end10_crit_edge:                    ; preds = %while.cond
+  br label %while.cond.backedge
+
+while.cond.backedge:                              ; preds = %while.cond.if.end10_crit_edge, %if.then9
+  br label %while.cond
+
+return.loopexit:                                  ; preds = %while.cond
+  br label %return
+
+; CHECK_LABEL: return:
+; CHECK: %{{.*}} = phi i32 [ 0, %entry ], [ %add, %return.loopexit ]
+return:                                           ; preds = %return.loopexit, %entry
+  %retval.4 = phi i32 [ 0, %entry ], [ %add, %return.loopexit ]
+  ret void
+}
+declare i32 @__gxx_personality_v0(...)
+
+; Expect to merge empty block while.cond2.loopexit into while.cond2
+define i32 @f_switch5(i32 %i) local_unnamed_addr {
+; CHECK-LABEL: @f_switch5
+entry:
+  %0 = load i32, i32* @b, align 4
+  %cond = icmp eq i32 %0, 6
+  br i1 %cond, label %while.cond.preheader, label %sw.epilog
+
+while.cond.preheader:                             ; preds = %entry
+  %1 = load i32*, i32** @a, align 8
+  %magicptr = ptrtoint i32* %1 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %1, i64 1
+  br label %while.cond
+
+; CHECK-LABEL: while.cond:
+; CHECK: i64 32, label %while.cond2
+; CHECK: i64 0, label %while.cond2
+while.cond:                                       ; preds = %land.rhs, %while.cond.preheader
+  switch i64 %magicptr, label %land.rhs [
+    i64 32, label %while.cond2.loopexit
+    i64 0, label %while.cond2.loopexit
+  ]
+
+land.rhs:                                         ; preds = %while.cond
+  %2 = load i32, i32* %arrayidx, align 4
+  %tobool1 = icmp eq i32 %2, 0
+  br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond
+
+while.cond2thread-pre-split.loopexit:             ; preds = %land.rhs
+  br label %while.cond2thread-pre-split
+
+while.cond2thread-pre-split:                      ; preds = %while.body4, %while.cond2thread-pre-split.loopexit
+  %.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ]
+  br label %while.cond2
+
+while.cond2.loopexit:                             ; preds = %while.cond, %while.cond
+  br label %while.cond2
+
+; CHECK-LABEL: while.cond2:
+; CHECK: %{{.*}} = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %land.rhs ], [ %1, %while.cond ], [ %1, %while.cond ]
+while.cond2:                                      ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split
+  %3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ]
+  %tobool3 = icmp eq i32* %3, null
+  br i1 %tobool3, label %sw.epilog, label %while.body4
+
+while.body4:                                      ; preds = %while.cond2
+  tail call void bitcast (void (...)* @fn2 to void ()*)()
+  %.pr.pre = load i32*, i32** @a, align 8
+  br label %while.cond2thread-pre-split
+
+sw.epilog:                                        ; preds = %while.cond2, %entry
+  ret i32 undef
+}
+
 
 ; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable()
 ; in the case where empty blocks are removed before creating BFI/BPI.
- at b = common global i32 0, align 4
- at a = common global i32* null, align 8
 define i32 @should_not_assert(i32 %i) local_unnamed_addr {
 entry:
   %0 = load i32, i32* @b, align 4




More information about the llvm-commits mailing list