[llvm] r304375 - [PartialInlining] Reduce outlining overhead by removing unneeded live-out(s)
Xinliang David Li via llvm-commits
llvm-commits at lists.llvm.org
Wed May 31 17:12:41 PDT 2017
Author: davidxl
Date: Wed May 31 19:12:41 2017
New Revision: 304375
URL: http://llvm.org/viewvc/llvm-project?rev=304375&view=rev
Log:
[PartialInlining] Reduce outlining overhead by removing unneeded live-out(s)
Differential Revision: http://reviews.llvm.org/D33694
Added:
llvm/trunk/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll
llvm/trunk/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll
Modified:
llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
Modified: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp?rev=304375&r1=304374&r2=304375&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp Wed May 31 19:12:41 2017
@@ -652,12 +652,21 @@ Function *PartialInlinerImpl::unswitchFu
// only split block when necessary:
PHINode *FirstPhi = getFirstPHI(PreReturn);
unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
+ auto IsTrivialPhi = [](PHINode *PN) -> Value * {
+ Value *CommonValue = PN->getIncomingValue(0);
+ if (all_of(PN->incoming_values(),
+ [&](Value *V) { return V == CommonValue; }))
+ return CommonValue;
+ return nullptr;
+ };
+
if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
NewReturnBlock = NewReturnBlock->splitBasicBlock(
NewReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = PreReturn->begin();
Instruction *Ins = &NewReturnBlock->front();
+ SmallVector<Instruction *, 4> DeadPhis;
while (I != PreReturn->end()) {
PHINode *OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi)
@@ -674,8 +683,22 @@ Function *PartialInlinerImpl::unswitchFu
RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
OldPhi->removeIncomingValue(NewE);
}
+
+ // After incoming values splitting, the old phi may become trivial.
+ // Keeping the trivial phi can introduce definition inside the outline
+ // region which is live-out, causing necessary overhead (load, store
+ // arg passing etc).
+ if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
+ OldPhi->replaceAllUsesWith(OldPhiVal);
+ DeadPhis.push_back(OldPhi);
+ }
+
++I;
}
+
+ for (auto *DP : DeadPhis)
+ DP->eraseFromParent();
+
for (auto E : OI->ReturnBlockPreds) {
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll?rev=304375&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll Wed May 31 19:12:41 2017
@@ -0,0 +1,61 @@
+; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+define i32 @test(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 (...) @bar() #1
+ %tmp1 = icmp slt i32 %arg, 0
+ br i1 %tmp1, label %bb6, label %bb2
+
+bb2: ; preds = %bb
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ %tmp3 = tail call i32 (...) @bar() #1
+ %tmp4 = icmp eq i32 %tmp3, 10
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb2
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb2, %bb
+ %tmp7 = phi i32 [ %tmp, %bb5 ], [ 0, %bb ], [ %tmp, %bb2 ]
+ ret i32 %tmp7
+}
+
+declare i32 @bar(...) local_unnamed_addr #1
+
+declare void @foo(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL: @dummy_caller
+; CHECK: codeRepl.i:
+; CHECK: call void @test.1_bb2()
+; CHECK-NOT: load
+; CHECK br
+
+bb:
+ %tmp = tail call i32 @test(i32 %arg)
+ ret i32 %tmp
+}
+
+; CHECK-LABEL: define internal void @test.1_bb2()
+; CHECK: .exitStub:
+; CHECK-NOT: store i32 %tmp7, i32* %tmp7.out
+; CHECK: ret
+
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind uwtable }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll?rev=304375&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll Wed May 31 19:12:41 2017
@@ -0,0 +1,62 @@
+; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+
+define i32 @test(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 (...) @bar() #1
+ %tmp1 = icmp slt i32 %arg, 0
+ br i1 %tmp1, label %bb6, label %bb2
+
+bb2: ; preds = %bb
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ %tmp3 = tail call i32 (...) @bar() #1
+ %tmp4 = icmp eq i32 %tmp3, 10
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb2
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ tail call void (...) @foo() #1
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb2, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ], [ 1, %bb2 ]
+ ret i32 %tmp7
+}
+
+; Function Attrs: nounwind uwtable
+declare i32 @bar(...) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+declare void @foo(...) local_unnamed_addr #0
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL: @dummy_caller
+; CHECK: codeRepl.i:
+; CHECK: call void @test.1_bb2()
+; CHECK-NOT: load
+; CHECK br
+bb:
+ %tmp = tail call i32 @test(i32 %arg)
+ ret i32 %tmp
+}
+
+; CHECK-LABEL: define internal void @test.1_bb2()
+; CHECK: .exitStub:
+; CHECK-NOT: store i32 %tmp7, i32* %tmp7.out
+; CHECK: ret
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
More information about the llvm-commits
mailing list