[llvm] 4038105 - [InstCombine] Sink pure instructions down to return and unreachable blocks

Max Kazantsev via llvm-commits llvm-commits at lists.llvm.org
Fri May 22 01:04:49 PDT 2020


Author: Max Kazantsev
Date: 2020-05-22T14:33:42+07:00
New Revision: 403810557be79e36d0153a04fefff4d72028b2b4

URL: https://github.com/llvm/llvm-project/commit/403810557be79e36d0153a04fefff4d72028b2b4
DIFF: https://github.com/llvm/llvm-project/commit/403810557be79e36d0153a04fefff4d72028b2b4.diff

LOG: [InstCombine] Sink pure instructions down to return and unreachable blocks

If the only user of `Instr` is in a return or unreachable block, we can
sink `Instr` to the`User` safely (unless it reads/writes memory).
Return or unreachable blocks are guaranteed to execute zero
or one time, and `Instr` always dominates `User`, so they either will
be executed together (execution of `User` always implies execution
of `Instr`) or not executed at all.

Differential Revision: https://reviews.llvm.org/D80120
Reviewed By: asbirlea, jdoerfert

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
    llvm/test/Transforms/InstCombine/overflow.ll
    llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
    llvm/test/Transforms/PGOProfile/chr.ll
    llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 247bebd48932..14076ab78e8e 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3302,6 +3302,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
   // We can only sink load instructions if there is nothing between the load and
   // the end of block that could change the value.
   if (I->mayReadFromMemory()) {
+    // We don't want to do any sophisticated alias analysis, so we only check
+    // the instructions after I in I's parent block if we try to sink to its
+    // successor block.
+    if (DestBlock->getUniquePredecessor() != I->getParent())
+      return false;
     for (BasicBlock::iterator Scan = I->getIterator(),
                               E = I->getParent()->end();
          Scan != E; ++Scan)
@@ -3419,7 +3424,8 @@ bool InstCombiner::run() {
       }
     }
 
-    // See if we can trivially sink this instruction to a successor basic block.
+    // See if we can trivially sink this instruction to its user if we can
+    // prove that the successor is not executed more frequently than our block.
     if (EnableCodeSinking)
       if (Use *SingleUse = I->getSingleUndroppableUse()) {
         BasicBlock *BB = I->getParent();
@@ -3435,7 +3441,20 @@ bool InstCombiner::run() {
         if (UserParent != BB) {
           // See if the user is one of our successors that has only one
           // predecessor, so that we don't have to split the critical edge.
-          if (UserParent->getUniquePredecessor() == BB) {
+          bool ShouldSink = UserParent->getUniquePredecessor() == BB;
+          // Another option where we can sink is a block that ends with a
+          // terminator that does not pass control to other block (such as
+          // return or unreachable). In this case:
+          //   - I dominates the User (by SSA form);
+          //   - the User will be executed at most once.
+          // So sinking I down to User is always profitable or neutral.
+          if (!ShouldSink) {
+            auto *Term = UserParent->getTerminator();
+            ShouldSink = isa<ReturnInst>(Term) || isa<UnreachableInst>(Term);
+          }
+          if (ShouldSink) {
+            assert(DT.dominates(BB, UserParent) &&
+                   "Dominance relation broken?");
             // Okay, the CFG is simple enough, try to sink this instruction.
             if (TryToSinkInstruction(I, UserParent)) {
               LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');

diff  --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
index d29bcc7d4172..53c82702c9eb 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -203,7 +203,6 @@ define double @pr26354(<2 x double>* %tmp, i1 %B) {
 ; CHECK-LABEL: @pr26354(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* [[TMP:%.*]], align 16
-; CHECK-NEXT:    [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]]
 ; CHECK:       if:
 ; CHECK-NEXT:    [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1
@@ -211,6 +210,7 @@ define double @pr26354(<2 x double>* %tmp, i1 %B) {
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
 ; CHECK-NEXT:    [[PH:%.*]] = phi <4 x double> [ undef, [[ENTRY:%.*]] ], [ [[I1]], [[IF]] ]
+; CHECK-NEXT:    [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0
 ; CHECK-NEXT:    [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1
 ; CHECK-NEXT:    [[MU:%.*]] = fmul double [[E1]], [[E3]]
 ; CHECK-NEXT:    ret double [[MU]]

diff  --git a/llvm/test/Transforms/InstCombine/overflow.ll b/llvm/test/Transforms/InstCombine/overflow.ll
index f5558890d138..6205a02776cc 100644
--- a/llvm/test/Transforms/InstCombine/overflow.ll
+++ b/llvm/test/Transforms/InstCombine/overflow.ll
@@ -8,13 +8,13 @@ define i32 @test1(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SADD:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]])
-; CHECK-NEXT:    [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractvalue { i32, i1 } [[SADD]], 1
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #2
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
+; CHECK-NEXT:    [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0
 ; CHECK-NEXT:    ret i32 [[SADD_RESULT]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
index 600e9388474c..1adcd2618a25 100644
--- a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
+++ b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
@@ -3,6 +3,7 @@
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 declare void @use(i32 %x)
+declare i1 @cond()
 
 define void @test_01(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test_01(
@@ -33,20 +34,18 @@ exit:
 }
 
 
-; TODO: %comparator and %signed can be sunk down to unreachable just as in
-; test above.
 define void @test_02(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test_02(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[C1:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
-; CHECK-NEXT:    [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]]
 ; CHECK:       medium:
 ; CHECK-NEXT:    [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]]
 ; CHECK-NEXT:    br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]]
 ; CHECK:       unreached:
+; CHECK-NEXT:    [[C1:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
+; CHECK-NEXT:    [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
 ; CHECK-NEXT:    call void @use(i32 [[COMPARATOR]])
 ; CHECK-NEXT:    unreachable
 ; CHECK:       exit:
@@ -70,3 +69,92 @@ unreached:
 exit:
   ret void
 }
+
+define i32 @test_03(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_03(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]]
+; CHECK:       medium:
+; CHECK-NEXT:    [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]]
+; CHECK:       unreached:
+; CHECK-NEXT:    [[C1:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
+; CHECK-NEXT:    [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
+; CHECK-NEXT:    ret i32 [[COMPARATOR]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %c1 = icmp eq i32 %x, %y
+  %c2 = icmp slt i32 %x, %y
+  %signed = select i1 %c2, i32 -1, i32 1
+  %comparator = select i1 %c1, i32 0, i32 %signed
+  br i1 %c2, label %exit, label %medium
+
+medium:
+  %c3 = icmp sgt i32 %x, %y
+  br i1 %c3, label %exit, label %unreached
+
+unreached:
+  ret i32 %comparator
+
+exit:
+  ret i32 0
+}
+
+define i32 @test_04(i32 %x, i1 %c) {
+; CHECK-LABEL: @test_04(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB2]] ]
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[P]], [[A]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+bb0:
+  %a = add i32 %x, 1
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  br label %bb3
+bb3:
+  %p = phi i32 [0, %bb1], [1, %bb2]
+  %r = add i32 %p, %a
+  ret i32 %r
+}
+
+; Do not sink into a potentially hotter block.
+define i32 @test_05_neg(i32 %x, i1 %cond) {
+; CHECK-LABEL: @test_05_neg(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[CALL:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[CALL]], label [[BB2]], label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[A]], [[BB2]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+;
+bb0:
+  %a = add i32 %x, 1
+  br i1 %cond, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  %call = call i1 @cond()
+  br i1 %call, label %bb2, label %bb3
+bb3:
+  %p = phi i32 [0, %bb1], [%a, %bb2]
+  ret i32 %p
+}

diff  --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll
index 5ab6d97b8ca5..9bedb87834f6 100644
--- a/llvm/test/Transforms/PGOProfile/chr.ll
+++ b/llvm/test/Transforms/PGOProfile/chr.ll
@@ -796,10 +796,6 @@ define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
 ; CHECK-LABEL: @test_chr_7_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
-; CHECK-NEXT:    [[V3:%.*]] = and i32 [[I0]], 2
-; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V3]], 0
-; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
-; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[J0]], 12
@@ -824,6 +820,10 @@ define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
+; CHECK-NEXT:    [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
 ; CHECK-NEXT:    ret i32 [[SUM2]]
 ;
 entry:
@@ -1381,8 +1381,6 @@ define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
 ; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]]
 ; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
 ; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
-; CHECK-NEXT:    [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
-; CHECK-NEXT:    [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    [[V9:%.*]] = and i32 [[I0]], 4
 ; CHECK-NEXT:    [[V10:%.*]] = icmp eq i32 [[V9]], 0
@@ -1391,6 +1389,8 @@ define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
+; CHECK-NEXT:    [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
+; CHECK-NEXT:    [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16
 ; CHECK-NEXT:    [[V11:%.*]] = add i32 [[I0]], [[SUM3]]
 ; CHECK-NEXT:    ret i32 [[V11]]
 ;

diff  --git a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index e9a3e608ea29..3439830d6265 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -273,7 +273,6 @@ define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0
 ; CHECK-NEXT:    [[Z2:%.*]] = select i1 [[X1]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B]], 0
-; CHECK-NEXT:    [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[A]], [[B]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP3:%.*]], label [[TMP2:%.*]]
@@ -282,6 +281,7 @@ define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
 ; CHECK-NEXT:    store i32 [[SIMPLIFYCFG_MERGE]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    br label [[TMP3]]
 ; CHECK:       3:
+; CHECK-NEXT:    [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3
 ; CHECK-NEXT:    ret i32 [[Z4]]
 ;
 entry:


        


More information about the llvm-commits mailing list