[llvm] r304245 - [PartialInlining] Shrinkwrap allocas with live range contained in outline region.

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Tue May 30 14:22:18 PDT 2017


Author: davidxl
Date: Tue May 30 16:22:18 2017
New Revision: 304245

URL: http://llvm.org/viewvc/llvm-project?rev=304245&view=rev
Log:
[PartialInlining] Shrinkwrap allocas with live range contained in outline region.

Differential Revision: http://reviews.llvm.org/D33618

Added:
    llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll
    llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll
    llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll
    llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll
Modified:
    llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h
    llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp

Modified: llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h?rev=304245&r1=304244&r2=304245&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h Tue May 30 16:22:18 2017
@@ -25,6 +25,7 @@ template <typename T> class ArrayRef;
   class BranchProbabilityInfo;
   class DominatorTree;
   class Function;
+  class Instruction;
   class Loop;
   class Module;
   class RegionNode;
@@ -103,7 +104,17 @@ template <typename T> class ArrayRef;
     /// a code sequence, that sequence is modified, including changing these
     /// sets, before extraction occurs. These modifications won't have any
     /// significant impact on the cost however.
-    void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const;
+    void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+                           const ValueSet &Allocas) const;
+    /// Find the set of allocas whose life ranges are contained within the
+    /// outlined region.
+    ///
+    /// Allocas which have life_time markers contained in the outlined region
+    /// should be pushed to the outlined function. The address bitcasts that
+    /// are used by the lifetime markers are also candidates for shrink-
+    /// wrapping. The instructions that need to be sinked are collected in
+    /// 'Allocas'.
+    void findAllocas(ValueSet &Allocas) const;
 
   private:
     void severSplitPHINodes(BasicBlock *&Header);

Modified: llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp?rev=304245&r1=304244&r2=304245&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp Tue May 30 16:22:18 2017
@@ -27,6 +27,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
@@ -141,16 +142,77 @@ static bool definedInCaller(const SetVec
   return false;
 }
 
-void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
-                                      ValueSet &Outputs) const {
+void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
+  Function *Func = (*Blocks.begin())->getParent();
+  for (BasicBlock &BB : *Func) {
+    if (Blocks.count(&BB))
+      continue;
+    for (Instruction &II : BB) {
+      auto *AI = dyn_cast<AllocaInst>(&II);
+      if (!AI)
+        continue;
+
+      // Returns true if matching life time markers are found within
+      // the outlined region.
+      auto GetLifeTimeMarkers = [&](Instruction *Addr) {
+        Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
+        for (User *U : Addr->users()) {
+          if (!definedInRegion(Blocks, U))
+            return false;
+
+          IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+          if (IntrInst) {
+            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
+              LifeStart = IntrInst;
+            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+              LifeEnd = IntrInst;
+          }
+        }
+        return LifeStart && LifeEnd;
+      };
+
+      if (GetLifeTimeMarkers(AI)) {
+        SinkCands.insert(AI);
+        continue;
+      }
+
+      // Follow the bitcast:
+      Instruction *MarkerAddr = nullptr;
+      for (User *U : AI->users()) {
+        if (U->stripPointerCasts() == AI) {
+          Instruction *Bitcast = cast<Instruction>(U);
+          if (GetLifeTimeMarkers(Bitcast)) {
+            MarkerAddr = Bitcast;
+            continue;
+          }
+        }
+        if (!definedInRegion(Blocks, U)) {
+          MarkerAddr = nullptr;
+          break;
+        }
+      }
+      if (MarkerAddr) {
+        if (!definedInRegion(Blocks, MarkerAddr))
+          SinkCands.insert(MarkerAddr);
+        SinkCands.insert(AI);
+      }
+    }
+  }
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+                                      const ValueSet &SinkCands) const {
+
   for (BasicBlock *BB : Blocks) {
     // If a used value is defined outside the region, it's an input.  If an
     // instruction is used outside the region, it's an output.
     for (Instruction &II : *BB) {
       for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
-           ++OI)
-        if (definedInCaller(Blocks, *OI))
-          Inputs.insert(*OI);
+           ++OI) {
+        Value *V = *OI;
+        if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+          Inputs.insert(V);
+      }
 
       for (User *U : II.users())
         if (!definedInRegion(Blocks, U)) {
@@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegi
   if (!isEligible())
     return nullptr;
 
-  ValueSet inputs, outputs;
+  ValueSet inputs, outputs, SinkingCands;
 
   // Assumption: this is a single-entry code region, and the header is the first
   // block in the region.
@@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegi
                                                "newFuncRoot");
   newFuncRoot->getInstList().push_back(BranchInst::Create(header));
 
+  findAllocas(SinkingCands);
+
   // Find inputs to, outputs from the code region.
-  findInputsOutputs(inputs, outputs);
+  findInputsOutputs(inputs, outputs, SinkingCands);
+
+  // Now sink all instructions which only have non-phi uses inside the region
+  for (auto *II : SinkingCands)
+    cast<Instruction>(II)->moveBefore(*newFuncRoot,
+                                      newFuncRoot->getFirstInsertionPt());
 
   // Calculate the exit blocks for the extracted region and the total exit
   //  weights for each of those blocks.

Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,68 @@
+
+;  RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck  %s
+ ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck   %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+; Function Attrs: nounwind uwtable
+define i32 @callee_sinkable_bitcast(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_sinkable_bitcast.{{[0-9]}}
+; CHECK: alloca
+; CHECK-NEXT: bitcast
+; CHECK: call void @llvm.lifetime
+bb:
+  %tmp = alloca  %"class.base", align 4
+  %tmp1 = bitcast %"class.base"* %tmp to i8*
+  %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+  %tmp3 = add nsw i32 %tmp2, 1
+  %tmp4 = icmp slt i32 %arg, 0
+  br i1 %tmp4, label %bb6, label %bb5
+
+bb5:                                              ; preds = %bb
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
+  %tmp11 = bitcast %"class.base"* %tmp to i32*
+  store i32 %tmp3, i32* %tmp11, align 4, !tbaa !2
+  store i32 %tmp3, i32* @g, align 4, !tbaa !2
+  call void @bar(i32* nonnull %tmp11) #2
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+  ret i32 %tmp7
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = tail call i32 @callee_sinkable_bitcast(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+

Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,65 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck  %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck   %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_no_bitcast(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_no_bitcast.{{[0-9]}}
+; CHECK: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+  %tmp = alloca i8, align 4
+  %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+  %tmp3 = add nsw i32 %tmp2, 1
+  %tmp4 = icmp slt i32 %arg, 0
+  br i1 %tmp4, label %bb6, label %bb5
+
+bb5:                                              ; preds = %bb
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
+  store i32 %tmp3, i32* @g, align 4, !tbaa !2
+  %tmp11 = bitcast i8 * %tmp to i32*
+  call void @bar(i32* nonnull %tmp11) #2
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+  ret i32 %tmp7
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = tail call i32 @callee_no_bitcast(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+

Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,67 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck  %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck   %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}}
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+  %tmp = alloca  i8, align 4
+  %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+  %tmp3 = add nsw i32 %tmp2, 1
+  %tmp4 = icmp slt i32 %arg, 0
+  br i1 %tmp4, label %bb6, label %bb5
+
+bb5:                                              ; preds = %bb
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
+  store i32 %tmp3, i32* @g, align 4, !tbaa !2
+  %tmp11 = bitcast i8* %tmp to i32*
+  call void @bar(i32* nonnull %tmp11) #2
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+  %tmp1 = bitcast i8* %tmp to i32*
+  ret i32 %tmp7
+}
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = tail call i32 @callee_unknown_use1(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+

Added: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll?rev=304245&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll Tue May 30 16:22:18 2017
@@ -0,0 +1,67 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck  %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck   %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+ at g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}}
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = bitcast i32* %tmp to i8*
+  %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+  %tmp3 = add nsw i32 %tmp2, 1
+  %tmp4 = icmp slt i32 %arg, 0
+  br i1 %tmp4, label %bb6, label %bb5
+
+bb5:                                              ; preds = %bb
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
+  store i32 %tmp3, i32* %tmp, align 4, !tbaa !2
+  store i32 %tmp3, i32* @g, align 4, !tbaa !2
+  call void @bar(i32* nonnull %tmp) #2
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+  %tmp10 = bitcast i8* %tmp1 to i32*
+  ret i32 %tmp7
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = tail call i32 @callee_unknown_use2(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+




More information about the llvm-commits mailing list