[llvm] 4038105 - [InstCombine] Sink pure instructions down to return and unreachable blocks
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Fri May 22 01:04:49 PDT 2020
Author: Max Kazantsev
Date: 2020-05-22T14:33:42+07:00
New Revision: 403810557be79e36d0153a04fefff4d72028b2b4
URL: https://github.com/llvm/llvm-project/commit/403810557be79e36d0153a04fefff4d72028b2b4
DIFF: https://github.com/llvm/llvm-project/commit/403810557be79e36d0153a04fefff4d72028b2b4.diff
LOG: [InstCombine] Sink pure instructions down to return and unreachable blocks
If the only user of `Instr` is in a return or unreachable block, we can
sink `Instr` to the`User` safely (unless it reads/writes memory).
Return or unreachable blocks are guaranteed to execute zero
or one time, and `Instr` always dominates `User`, so they either will
be executed together (execution of `User` always implies execution
of `Instr`) or not executed at all.
Differential Revision: https://reviews.llvm.org/D80120
Reviewed By: asbirlea, jdoerfert
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
llvm/test/Transforms/InstCombine/overflow.ll
llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
llvm/test/Transforms/PGOProfile/chr.ll
llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 247bebd48932..14076ab78e8e 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3302,6 +3302,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
// We can only sink load instructions if there is nothing between the load and
// the end of block that could change the value.
if (I->mayReadFromMemory()) {
+ // We don't want to do any sophisticated alias analysis, so we only check
+ // the instructions after I in I's parent block if we try to sink to its
+ // successor block.
+ if (DestBlock->getUniquePredecessor() != I->getParent())
+ return false;
for (BasicBlock::iterator Scan = I->getIterator(),
E = I->getParent()->end();
Scan != E; ++Scan)
@@ -3419,7 +3424,8 @@ bool InstCombiner::run() {
}
}
- // See if we can trivially sink this instruction to a successor basic block.
+ // See if we can trivially sink this instruction to its user if we can
+ // prove that the successor is not executed more frequently than our block.
if (EnableCodeSinking)
if (Use *SingleUse = I->getSingleUndroppableUse()) {
BasicBlock *BB = I->getParent();
@@ -3435,7 +3441,20 @@ bool InstCombiner::run() {
if (UserParent != BB) {
// See if the user is one of our successors that has only one
// predecessor, so that we don't have to split the critical edge.
- if (UserParent->getUniquePredecessor() == BB) {
+ bool ShouldSink = UserParent->getUniquePredecessor() == BB;
+ // Another option where we can sink is a block that ends with a
+ // terminator that does not pass control to other block (such as
+ // return or unreachable). In this case:
+ // - I dominates the User (by SSA form);
+ // - the User will be executed at most once.
+ // So sinking I down to User is always profitable or neutral.
+ if (!ShouldSink) {
+ auto *Term = UserParent->getTerminator();
+ ShouldSink = isa<ReturnInst>(Term) || isa<UnreachableInst>(Term);
+ }
+ if (ShouldSink) {
+ assert(DT.dominates(BB, UserParent) &&
+ "Dominance relation broken?");
// Okay, the CFG is simple enough, try to sink this instruction.
if (TryToSinkInstruction(I, UserParent)) {
LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
index d29bcc7d4172..53c82702c9eb 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -203,7 +203,6 @@ define double @pr26354(<2 x double>* %tmp, i1 %B) {
; CHECK-LABEL: @pr26354(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* [[TMP:%.*]], align 16
-; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0
; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]]
; CHECK: if:
; CHECK-NEXT: [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1
@@ -211,6 +210,7 @@ define double @pr26354(<2 x double>* %tmp, i1 %B) {
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[PH:%.*]] = phi <4 x double> [ undef, [[ENTRY:%.*]] ], [ [[I1]], [[IF]] ]
+; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1
; CHECK-NEXT: [[MU:%.*]] = fmul double [[E1]], [[E3]]
; CHECK-NEXT: ret double [[MU]]
diff --git a/llvm/test/Transforms/InstCombine/overflow.ll b/llvm/test/Transforms/InstCombine/overflow.ll
index f5558890d138..6205a02776cc 100644
--- a/llvm/test/Transforms/InstCombine/overflow.ll
+++ b/llvm/test/Transforms/InstCombine/overflow.ll
@@ -8,13 +8,13 @@ define i32 @test1(i32 %a, i32 %b) nounwind ssp {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SADD:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]])
-; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0
; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { i32, i1 } [[SADD]], 1
; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
+; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0
; CHECK-NEXT: ret i32 [[SADD_RESULT]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
index 600e9388474c..1adcd2618a25 100644
--- a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
+++ b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
@@ -3,6 +3,7 @@
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
declare void @use(i32 %x)
+declare i1 @cond()
define void @test_01(i32 %x, i32 %y) {
; CHECK-LABEL: @test_01(
@@ -33,20 +34,18 @@ exit:
}
-; TODO: %comparator and %signed can be sunk down to unreachable just as in
-; test above.
define void @test_02(i32 %x, i32 %y) {
; CHECK-LABEL: @test_02(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X]], [[Y]]
-; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
-; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
+; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]]
; CHECK: medium:
; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]]
; CHECK-NEXT: br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]]
; CHECK: unreached:
+; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
+; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
; CHECK-NEXT: call void @use(i32 [[COMPARATOR]])
; CHECK-NEXT: unreachable
; CHECK: exit:
@@ -70,3 +69,92 @@ unreached:
exit:
ret void
}
+
+define i32 @test_03(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_03(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]]
+; CHECK: medium:
+; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]]
+; CHECK: unreached:
+; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
+; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
+; CHECK-NEXT: ret i32 [[COMPARATOR]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %c1 = icmp eq i32 %x, %y
+ %c2 = icmp slt i32 %x, %y
+ %signed = select i1 %c2, i32 -1, i32 1
+ %comparator = select i1 %c1, i32 0, i32 %signed
+ br i1 %c2, label %exit, label %medium
+
+medium:
+ %c3 = icmp sgt i32 %x, %y
+ br i1 %c3, label %exit, label %unreached
+
+unreached:
+ ret i32 %comparator
+
+exit:
+ ret i32 0
+}
+
+define i32 @test_04(i32 %x, i1 %c) {
+; CHECK-LABEL: @test_04(
+; CHECK-NEXT: bb0:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB2]] ]
+; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = add i32 [[P]], [[A]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+bb0:
+ %a = add i32 %x, 1
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ br label %bb3
+bb3:
+ %p = phi i32 [0, %bb1], [1, %bb2]
+ %r = add i32 %p, %a
+ ret i32 %r
+}
+
+; Do not sink into a potentially hotter block.
+define i32 @test_05_neg(i32 %x, i1 %cond) {
+; CHECK-LABEL: @test_05_neg(
+; CHECK-NEXT: bb0:
+; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[CALL:%.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 [[CALL]], label [[BB2]], label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[A]], [[BB2]] ]
+; CHECK-NEXT: ret i32 [[P]]
+;
+bb0:
+ %a = add i32 %x, 1
+ br i1 %cond, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ %call = call i1 @cond()
+ br i1 %call, label %bb2, label %bb3
+bb3:
+ %p = phi i32 [0, %bb1], [%a, %bb2]
+ ret i32 %p
+}
diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll
index 5ab6d97b8ca5..9bedb87834f6 100644
--- a/llvm/test/Transforms/PGOProfile/chr.ll
+++ b/llvm/test/Transforms/PGOProfile/chr.ll
@@ -796,10 +796,6 @@ define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
; CHECK-LABEL: @test_chr_7_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
-; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
-; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
-; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
-; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[J0]], 12
@@ -824,6 +820,10 @@ define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
+; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
+; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
; CHECK-NEXT: ret i32 [[SUM2]]
;
entry:
@@ -1381,8 +1381,6 @@ define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]]
; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
-; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
-; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4
; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
@@ -1391,6 +1389,8 @@ define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
+; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
+; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16
; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[SUM3]]
; CHECK-NEXT: ret i32 [[V11]]
;
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index e9a3e608ea29..3439830d6265 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -273,7 +273,6 @@ define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
; CHECK-NEXT: [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0
; CHECK-NEXT: [[Z2:%.*]] = select i1 [[X1]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B]], 0
-; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[A]], [[B]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP3:%.*]], label [[TMP2:%.*]]
@@ -282,6 +281,7 @@ define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
; CHECK-NEXT: store i32 [[SIMPLIFYCFG_MERGE]], i32* [[P:%.*]], align 4
; CHECK-NEXT: br label [[TMP3]]
; CHECK: 3:
+; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3
; CHECK-NEXT: ret i32 [[Z4]]
;
entry:
More information about the llvm-commits
mailing list