[llvm] r321236 - [SimplifyCFG] Avoid quadratic on a predecessors number behavior in instruction sinking.

Wed Dec 20 17:22:13 PST 2017

Author: mzolotukhin
Date: Wed Dec 20 17:22:13 2017
New Revision: 321236

URL: http://llvm.org/viewvc/llvm-project?rev=321236&view=rev
Log:
[SimplifyCFG] Avoid quadratic on a predecessors number behavior in instruction sinking.

If a block has N predecessors, then the current algorithm will try to
sink common code to this block N times (whenever we visit a
predecessor). Every attempt to sink the common code includes going
through all predecessors, so the complexity of the algorithm becomes
O(N^2).
With this patch we try to sink common code only when we visit the block
itself. With this, the complexity goes down to O(N).
As a side effect, the moment the code is sunk is slightly different than
before (the order of simplifications has been changed), that's why I had
to adjust two tests (note that neither of the tests is supposed to test
SimplifyCFG):
* test/CodeGen/AArch64/arm64-jumptable.ll - changes in this test mimic
the changes that previous implementation of SimplifyCFG would do.
* test/CodeGen/ARM/avoid-cpsr-rmw.ll - in this test I disabled common
code sinking by a command line flag.

Modified:
    llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-jumptable.ll
    llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=321236&r1=321235&r2=321236&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Wed Dec 20 17:22:13 2017
@@ -1654,14 +1654,11 @@ namespace {
 
 } // end anonymous namespace
 
-/// Given an unconditional branch that goes to BBEnd,
-/// check whether BBEnd has only two predecessors and the other predecessor
-/// ends with an unconditional branch. If it is true, sink any common code
-/// in the two predecessors to BBEnd.
-static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
-  assert(BI1->isUnconditional());
-  BasicBlock *BBEnd = BI1->getSuccessor(0);
-
+/// Check whether BB's predecessors end with unconditional branches. If it is
+/// true, sink any common code from the predecessors to BB.
+/// We also allow one predecessor to end with conditional branch (but no more
+/// than one).
+static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
   // We support two situations:
   //   (1) all incoming arcs are unconditional
   //   (2) one incoming arc is conditional
@@ -1705,7 +1702,7 @@ static bool SinkThenElseCodeToEnd(Branch
   //
   SmallVector<BasicBlock*,4> UnconditionalPreds;
   Instruction *Cond = nullptr;
-  for (auto *B : predecessors(BBEnd)) {
+  for (auto *B : predecessors(BB)) {
     auto *T = B->getTerminator();
     if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
       UnconditionalPreds.push_back(B);
@@ -1773,8 +1770,7 @@ static bool SinkThenElseCodeToEnd(Branch
     DEBUG(dbgs() << "SINK: Splitting edge\n");
     // We have a conditional edge and we're going to sink some instructions.
     // Insert a new block postdominating all blocks we're going to sink from.
-    if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds,
-                                ".sink.split"))
+    if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
       // Edges couldn't be split.
       return false;
     Changed = true;
@@ -5728,9 +5724,6 @@ bool SimplifyCFGOpt::SimplifyUncondBranc
   BasicBlock *BB = BI->getParent();
   BasicBlock *Succ = BI->getSuccessor(0);
 
-  if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
-    return true;
-
   // If the Terminator is the only non-phi instruction, simplify the block.
   // If LoopHeader is provided, check if the block or its successor is a loop
   // header. (This is for early invocations before loop simplify and
@@ -6008,6 +6001,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB)
   if (MergeBlockIntoPredecessor(BB))
     return true;
 
+  if (SinkCommon && Options.SinkCommonInsts)
+    Changed |= SinkCommonCodeFromPredecessors(BB);
+
   IRBuilder<> Builder(BB);
 
   // If there is a trivial two-entry PHI node in this basic block, and we can

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-jumptable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-jumptable.ll?rev=321236&r1=321235&r2=321236&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-jumptable.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-jumptable.ll Wed Dec 20 17:22:13 2017
@@ -6,22 +6,20 @@ define void @sum(i32 %a, i32* %to, i32 %
 entry:
   switch i32 %a, label %exit [
     i32 1, label %bb1
-    i32 2, label %bb2
+    i32 2, label %exit.sink.split
     i32 3, label %bb3
     i32 4, label %bb4
   ]
 bb1:
   %b = add i32 %c, 1
-  store i32 %b, i32* %to
-  br label %exit
-bb2:
-  store i32 2, i32* %to
-  br label %exit
+  br label %exit.sink.split
 bb3:
-  store i32 3, i32* %to
-  br label %exit
+  br label %exit.sink.split
 bb4:
-  store i32 5, i32* %to
+  br label %exit.sink.split
+exit.sink.split:
+  %.sink = phi i32 [ 5, %bb4 ], [ %b, %bb1 ], [ 3, %bb3 ], [ %a, %entry ]
+  store i32 %.sink, i32* %to
   br label %exit
 exit:
   ret void

Modified: llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll?rev=321236&r1=321235&r2=321236&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll Wed Dec 20 17:22:13 2017
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 -simplifycfg-sink-common=false | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift     -simplifycfg-sink-common=false | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT
 ; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
 ; dependency) when it isn't dependent on last CPSR defining instruction.
 ; rdar://8928208