[llvm] cc63203 - [SimplifyCFG] Common code sinking: fix application of profitability check

Thu Apr 29 11:11:53 PDT 2021

Author: Roman Lebedev
Date: 2021-04-29T21:11:40+03:00
New Revision: cc63203908daa3a844d09160375c191003ab970c

URL: https://github.com/llvm/llvm-project/commit/cc63203908daa3a844d09160375c191003ab970c
DIFF: https://github.com/llvm/llvm-project/commit/cc63203908daa3a844d09160375c191003ab970c.diff

LOG: [SimplifyCFG] Common code sinking: fix application of profitability check

The profitability check is: we don't want to create more than a single PHI
per instruction sunk. We need to create the PHI unless we'll sink
all of it's would-be incoming values.

But there is a caveat there.
This profitability check doesn't converge on the first iteration!
If we first decide that we want to sink 10 instructions,
but then determine that 5'th one is unprofitable to sink,
that may result in us not sinking some instructions that
resulted in determining that some other instruction
we've determined to be profitable to sink becoming unprofitable.

So we need to iterate until we converge, as in determine
that all leftover instructions are profitable to sink.

But, the direct approach of just re-iterating seems dumb,
because in the worst case we'd find that the last instruction
is unprofitable, which would result in revisiting instructions
many many times.

Instead, i think we can get away with just two passes - forward and backward.
However then it isn't obvious what is the most performant way to update
InstructionsToSink.

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2369935236a6c..31c50f459898a 100644

--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1933,6 +1933,20 @@ namespace {
       }
     }
 
+    void operator++() {
+      if (Fail)
+        return;
+      for (auto *&Inst : Insts) {
+        for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+          Inst = Inst->getNextNode();
+        // Already at end of block.
+        if (!Inst) {
+          Fail = true;
+          return;
+        }
+      }
+    }
+
     ArrayRef<Instruction*> operator * () const {
       return Insts;
     }
@@ -2005,7 +2019,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
   // carry on. If we can sink an instruction but need to PHI-merge some operands
   // (because they're not identical in each instruction) we add these to
   // PHIOperands.
-  unsigned ScanIdx = 0;
+  int ScanIdx = 0;
   SmallPtrSet<Value*,4> InstructionsToSink;
   DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
   LockstepReverseIterator LRI(UnconditionalPreds);
@@ -2041,8 +2055,24 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
 
     return NumPHIInsts <= 1;
   };
+
+  // If no instructions can be sunk, early-return.
+  if (ScanIdx == 0)
+    return false;
+
+  // We've determined that we are going to sink last ScanIdx instructions,
+  // and recorded them in InstructionsToSink. Now, some instructions may be
+  // unprofitable to sink. But that determination depends on the instructions
+  // that we are going to sink.
+
+  // First, forward scan: find the first instruction unprofitable to sink,
+  // recording all the ones that are profitable to sink.
+  // FIXME: would it be better, after we detect that not all are profitable.
+  // to either record the profitable ones, or erase the unprofitable ones?
+  // Maybe we need to choose (at runtime) the one that will touch least instrs?
   LRI.reset();
-  unsigned Idx = 0;
+  int Idx = 0;
+  SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
   while (Idx < ScanIdx) {
     if (!ProfitableToSinkInstruction(LRI)) {
       // Too many PHIs would be created.
@@ -2050,10 +2080,43 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
           dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
       break;
     }
+    InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
     --LRI;
     ++Idx;
   }
-  ScanIdx = Idx;
+
+  // If no instructions can be sunk, early-return.
+  if (Idx == 0)
+    return false;
+
+  // Did we determine that (only) some instructions are unprofitable to sink?
+  if (Idx < ScanIdx) {
+    // Okay, some instructions are unprofitable.
+    ScanIdx = Idx;
+    InstructionsToSink = InstructionsProfitableToSink;
+
+    // But, that may make other instructions unprofitable, too.
+    // So, do a backward scan, do any earlier instructions become unprofitable?
+    assert(!ProfitableToSinkInstruction(LRI) &&
+           "We already know that the last instruction is unprofitable to sink");
+    ++LRI;
+    --Idx;
+    while (Idx >= 0) {
+      // If we detect that an instruction becomes unprofitable to sink,
+      // all earlier instructions won't be sunk either,
+      // so preemptively keep InstructionsProfitableToSink in sync.
+      // FIXME: is this the most performant approach?
+      for (auto *I : *LRI)
+        InstructionsProfitableToSink.erase(I);
+      if (!ProfitableToSinkInstruction(LRI)) {
+        // Everything starting with this instruction won't be sunk.
+        ScanIdx = Idx;
+        InstructionsToSink = InstructionsProfitableToSink;
+      }
+      ++LRI;
+      --Idx;
+    }
+  }
 
   // If no instructions can be sunk, early-return.
   if (ScanIdx == 0)
@@ -2068,7 +2131,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
     // don't do it unless we'd sink at least one non-speculatable instruction.
     // See https://bugs.llvm.org/show_bug.cgi?id=30244
     LRI.reset();
-    unsigned Idx = 0;
+    int Idx = 0;
     bool Profitable = false;
     while (Idx < ScanIdx) {
       if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
@@ -2102,7 +2165,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
   // sink presuming a later value will also be sunk, but stop half way through
   // and never actually sink it which means we produce more PHIs than intended.
   // This is unlikely in practice though.
-  unsigned SinkIdx = 0;
+  int SinkIdx = 0;
   for (; SinkIdx != ScanIdx; ++SinkIdx) {
     LLVM_DEBUG(dbgs() << "SINK: Sink: "
                       << *UnconditionalPreds[0]->getTerminator()->getPrevNode()

diff  --git a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll
index a85ad8e69a20b..c99668264c6cb 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll
@@ -1496,17 +1496,17 @@ define void @creating_too_many_phis(i1 %cond, i32 %a, i32 %b, i32 %c, i32 %d, i3
 ; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[V1:%.*]] = add i32 [[V0]], [[C:%.*]]
 ; CHECK-NEXT:    [[V2:%.*]] = add i32 [[D:%.*]], [[E:%.*]]
+; CHECK-NEXT:    [[R3:%.*]] = add i32 [[V1]], [[V2]]
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[V4:%.*]] = add i32 [[A]], [[B]]
 ; CHECK-NEXT:    [[V5:%.*]] = add i32 [[V4]], [[C]]
 ; CHECK-NEXT:    [[V6:%.*]] = add i32 [[G:%.*]], [[H:%.*]]
+; CHECK-NEXT:    [[R7:%.*]] = add i32 [[V5]], [[V6]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[V6_SINK:%.*]] = phi i32 [ [[V6]], [[BB1]] ], [ [[V2]], [[BB0]] ]
-; CHECK-NEXT:    [[V5_SINK:%.*]] = phi i32 [ [[V5]], [[BB1]] ], [ [[V1]], [[BB0]] ]
-; CHECK-NEXT:    [[R7:%.*]] = add i32 [[V5_SINK]], [[V6_SINK]]
-; CHECK-NEXT:    call void @use32(i32 [[R7]])
+; CHECK-NEXT:    [[R7_SINK:%.*]] = phi i32 [ [[R7]], [[BB1]] ], [ [[R3]], [[BB0]] ]
+; CHECK-NEXT:    call void @use32(i32 [[R7_SINK]])
 ; CHECK-NEXT:    ret void
 ;
   br i1 %cond, label %bb0, label %bb1