[llvm] r304245 - [PartialInlining] Shrinkwrap allocas with live range contained in outline region.
Xinliang David Li via llvm-commits
llvm-commits at lists.llvm.org
Tue May 30 14:22:18 PDT 2017
Author: davidxl
Date: Tue May 30 16:22:18 2017
New Revision: 304245
URL: http://llvm.org/viewvc/llvm-project?rev=304245&view=rev
Log:
[PartialInlining] Shrinkwrap allocas with live range contained in outline region.
Differential Revision: http://reviews.llvm.org/D33618
Added:
llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll
llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll
llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll
llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll
Modified:
llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h
llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp
Modified: llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h?rev=304245&r1=304244&r2=304245&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h Tue May 30 16:22:18 2017
@@ -25,6 +25,7 @@ template <typename T> class ArrayRef;
class BranchProbabilityInfo;
class DominatorTree;
class Function;
+ class Instruction;
class Loop;
class Module;
class RegionNode;
@@ -103,7 +104,17 @@ template <typename T> class ArrayRef;
/// a code sequence, that sequence is modified, including changing these
/// sets, before extraction occurs. These modifications won't have any
/// significant impact on the cost however.
- void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const;
+ void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+ const ValueSet &Allocas) const;
+ /// Find the set of allocas whose life ranges are contained within the
+ /// outlined region.
+ ///
+ /// Allocas which have life_time markers contained in the outlined region
+ /// should be pushed to the outlined function. The address bitcasts that
+ /// are used by the lifetime markers are also candidates for shrink-
+ /// wrapping. The instructions that need to be sinked are collected in
+ /// 'Allocas'.
+ void findAllocas(ValueSet &Allocas) const;
private:
void severSplitPHINodes(BasicBlock *&Header);
Modified: llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp?rev=304245&r1=304244&r2=304245&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp Tue May 30 16:22:18 2017
@@ -27,6 +27,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
@@ -141,16 +142,77 @@ static bool definedInCaller(const SetVec
return false;
}
-void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
- ValueSet &Outputs) const {
+void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ for (Instruction &II : BB) {
+ auto *AI = dyn_cast<AllocaInst>(&II);
+ if (!AI)
+ continue;
+
+ // Returns true if matching life time markers are found within
+ // the outlined region.
+ auto GetLifeTimeMarkers = [&](Instruction *Addr) {
+ Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
+ for (User *U : Addr->users()) {
+ if (!definedInRegion(Blocks, U))
+ return false;
+
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
+ LifeStart = IntrInst;
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ LifeEnd = IntrInst;
+ }
+ }
+ return LifeStart && LifeEnd;
+ };
+
+ if (GetLifeTimeMarkers(AI)) {
+ SinkCands.insert(AI);
+ continue;
+ }
+
+ // Follow the bitcast:
+ Instruction *MarkerAddr = nullptr;
+ for (User *U : AI->users()) {
+ if (U->stripPointerCasts() == AI) {
+ Instruction *Bitcast = cast<Instruction>(U);
+ if (GetLifeTimeMarkers(Bitcast)) {
+ MarkerAddr = Bitcast;
+ continue;
+ }
+ }
+ if (!definedInRegion(Blocks, U)) {
+ MarkerAddr = nullptr;
+ break;
+ }
+ }
+ if (MarkerAddr) {
+ if (!definedInRegion(Blocks, MarkerAddr))
+ SinkCands.insert(MarkerAddr);
+ SinkCands.insert(AI);
+ }
+ }
+ }
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+ const ValueSet &SinkCands) const {
+
for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
for (Instruction &II : *BB) {
for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
- ++OI)
- if (definedInCaller(Blocks, *OI))
- Inputs.insert(*OI);
+ ++OI) {
+ Value *V = *OI;
+ if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+ Inputs.insert(V);
+ }
for (User *U : II.users())
if (!definedInRegion(Blocks, U)) {
@@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegi
if (!isEligible())
return nullptr;
- ValueSet inputs, outputs;
+ ValueSet inputs, outputs, SinkingCands;
// Assumption: this is a single-entry code region, and the header is the first
// block in the region.
@@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegi
"newFuncRoot");
newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+ findAllocas(SinkingCands);
+
// Find inputs to, outputs from the code region.
- findInputsOutputs(inputs, outputs);
+ findInputsOutputs(inputs, outputs, SinkingCands);
+
+ // Now sink all instructions which only have non-phi uses inside the region
+ for (auto *II : SinkingCands)
+ cast<Instruction>(II)->moveBefore(*newFuncRoot,
+ newFuncRoot->getFirstInsertionPt());
// Calculate the exit blocks for the extracted region and the total exit
// weights for each of those blocks.
Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,68 @@
+
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+ ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+; Function Attrs: nounwind uwtable
+define i32 @callee_sinkable_bitcast(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_sinkable_bitcast.{{[0-9]}}
+; CHECK: alloca
+; CHECK-NEXT: bitcast
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca %"class.base", align 4
+ %tmp1 = bitcast %"class.base"* %tmp to i8*
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
+ %tmp11 = bitcast %"class.base"* %tmp to i32*
+ store i32 %tmp3, i32* %tmp11, align 4, !tbaa !2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ call void @bar(i32* nonnull %tmp11) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ ret i32 %tmp7
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_sinkable_bitcast(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,65 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_no_bitcast(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_no_bitcast.{{[0-9]}}
+; CHECK: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca i8, align 4
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ %tmp11 = bitcast i8 * %tmp to i32*
+ call void @bar(i32* nonnull %tmp11) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ ret i32 %tmp7
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_no_bitcast(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+
Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,67 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}}
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca i8, align 4
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ %tmp11 = bitcast i8* %tmp to i32*
+ call void @bar(i32* nonnull %tmp11) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ %tmp1 = bitcast i8* %tmp to i32*
+ ret i32 %tmp7
+}
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_unknown_use1(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+
Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,67 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}}
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca i32, align 4
+ %tmp1 = bitcast i32* %tmp to i8*
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
+ store i32 %tmp3, i32* %tmp, align 4, !tbaa !2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ call void @bar(i32* nonnull %tmp) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ %tmp10 = bitcast i8* %tmp1 to i32*
+ ret i32 %tmp7
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_unknown_use2(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+
More information about the llvm-commits
mailing list