[llvm] 01e2b39 - [Partial Inliner] Compute intrinsic cost through TTI

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 16 07:17:00 PDT 2020


Author: Dangeti Tharun kumar
Date: 2020-09-16T15:12:31+01:00
New Revision: 01e2b394ee16502440dbbb5440502a1e2aaf1477

URL: https://github.com/llvm/llvm-project/commit/01e2b394ee16502440dbbb5440502a1e2aaf1477
DIFF: https://github.com/llvm/llvm-project/commit/01e2b394ee16502440dbbb5440502a1e2aaf1477.diff

LOG: [Partial Inliner] Compute intrinsic cost through TTI

https://bugs.llvm.org/show_bug.cgi?id=45932

assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region") getting triggered in computeBBInlineCost.

Intrinsics like "assume" are considered regular function calls while computing costs.
This patch enables computeBBInlineCost to queries TTI for intrinsic call cost.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D87132

Added: 
    llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll

Modified: 
    llvm/lib/Transforms/IPO/PartialInlining.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index e1dc036ae413..a185e964d1b6 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -226,10 +226,13 @@ struct PartialInlinerImpl {
     // multi-region outlining.
     FunctionCloner(Function *F, FunctionOutliningInfo *OI,
                    OptimizationRemarkEmitter &ORE,
-                   function_ref<AssumptionCache *(Function &)> LookupAC);
+                   function_ref<AssumptionCache *(Function &)> LookupAC,
+                   function_ref<TargetTransformInfo &(Function &)> GetTTI);
     FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
                    OptimizationRemarkEmitter &ORE,
-                   function_ref<AssumptionCache *(Function &)> LookupAC);
+                   function_ref<AssumptionCache *(Function &)> LookupAC,
+                   function_ref<TargetTransformInfo &(Function &)> GetTTI);
+
     ~FunctionCloner();
 
     // Prepare for function outlining: making sure there is only
@@ -266,6 +269,7 @@ struct PartialInlinerImpl {
     std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
     OptimizationRemarkEmitter &ORE;
     function_ref<AssumptionCache *(Function &)> LookupAC;
+    function_ref<TargetTransformInfo &(Function &)> GetTTI;
   };
 
 private:
@@ -334,7 +338,7 @@ struct PartialInlinerImpl {
   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
   // approximate both the size and runtime cost (Note that in the current
   // inline cost analysis, there is no clear distinction there either).
-  static int computeBBInlineCost(BasicBlock *BB);
+  static int computeBBInlineCost(BasicBlock *BB, TargetTransformInfo *TTI);
 
   std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
   std::unique_ptr<FunctionOutliningMultiRegionInfo>
@@ -448,9 +452,10 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
 
   // Use the same computeBBInlineCost function to compute the cost savings of
   // the outlining the candidate region.
+  TargetTransformInfo *FTTI = &GetTTI(*F);
   int OverallFunctionCost = 0;
   for (auto &BB : *F)
-    OverallFunctionCost += computeBBInlineCost(&BB);
+    OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
 
 #ifndef NDEBUG
   if (TracePartialInlining)
@@ -509,7 +514,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
         continue;
       int OutlineRegionCost = 0;
       for (auto *BB : DominateVector)
-        OutlineRegionCost += computeBBInlineCost(BB);
+        OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
 
 #ifndef NDEBUG
       if (TracePartialInlining)
@@ -843,7 +848,8 @@ bool PartialInlinerImpl::shouldPartialInline(
 // TODO: Ideally  we should share Inliner's InlineCost Analysis code.
 // For now use a simplified version. The returned 'InlineCost' will be used
 // to esimate the size cost as well as runtime cost of the BB.
-int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
+int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
+                                            TargetTransformInfo *TTI) {
   int InlineCost = 0;
   const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
   for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -866,6 +872,21 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
     if (I.isLifetimeStartOrEnd())
       continue;
 
+    if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+      Intrinsic::ID IID = II->getIntrinsicID();
+      SmallVector<Type *, 4> Tys;
+      FastMathFlags FMF;
+      for (Value *Val : II->args())
+        Tys.push_back(Val->getType());
+
+      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+        FMF = FPMO->getFastMathFlags();
+
+      IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
+      InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
+      continue;
+    }
+
     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
       InlineCost += getCallsiteCost(*CI, DL);
       continue;
@@ -893,11 +914,13 @@ PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
     BasicBlock* OutliningCallBB = FuncBBPair.second;
     // Now compute the cost of the call sequence to the outlined function
     // 'OutlinedFunction' in BB 'OutliningCallBB':
-    OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
+    auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
+    OutliningFuncCallCost +=
+        computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
 
     // Now compute the cost of the extracted/outlined function itself:
     for (BasicBlock &BB : *OutlinedFunc)
-      OutlinedFunctionCost += computeBBInlineCost(&BB);
+      OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
   }
   assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
          "Outlined function cost should be no less than the outlined region");
@@ -962,8 +985,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
 
 PartialInlinerImpl::FunctionCloner::FunctionCloner(
     Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
-    function_ref<AssumptionCache *(Function &)> LookupAC)
-    : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
+    function_ref<AssumptionCache *(Function &)> LookupAC,
+    function_ref<TargetTransformInfo &(Function &)> GetTTI)
+    : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
   ClonedOI = std::make_unique<FunctionOutliningInfo>();
 
   // Clone the function, so that we can hack away on it.
@@ -987,8 +1011,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
 PartialInlinerImpl::FunctionCloner::FunctionCloner(
     Function *F, FunctionOutliningMultiRegionInfo *OI,
     OptimizationRemarkEmitter &ORE,
-    function_ref<AssumptionCache *(Function &)> LookupAC)
-    : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
+    function_ref<AssumptionCache *(Function &)> LookupAC,
+    function_ref<TargetTransformInfo &(Function &)> GetTTI)
+    : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
   ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
 
   // Clone the function, so that we can hack away on it.
@@ -1099,10 +1124,10 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
 
 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
 
-  auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
+  auto ComputeRegionCost = [&](SmallVectorImpl<BasicBlock *> &Region) {
     int Cost = 0;
     for (BasicBlock* BB : Region)
-      Cost += computeBBInlineCost(BB);
+      Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
     return Cost;
   };
 
@@ -1196,9 +1221,10 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
 
   // Gather up the blocks that we're going to extract.
   std::vector<BasicBlock *> ToExtract;
+  auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
   ToExtract.push_back(ClonedOI->NonReturnBlock);
-  OutlinedRegionCost +=
-      PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
+  OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
+      ClonedOI->NonReturnBlock, ClonedFuncTTI);
   for (BasicBlock &BB : *ClonedFunc)
     if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
       ToExtract.push_back(&BB);
@@ -1206,7 +1232,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
       // into the outlined function which may make the outlining
       // overhead (the 
diff erence of the outlined function cost
       // and OutliningRegionCost) look larger.
-      OutlinedRegionCost += computeBBInlineCost(&BB);
+      OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
     }
 
   // Extract the body of the if.
@@ -1276,7 +1302,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
     std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
         computeOutliningColdRegionsInfo(F, ORE);
     if (OMRI) {
-      FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
+      FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
 
 #ifndef NDEBUG
       if (TracePartialInlining) {
@@ -1309,7 +1335,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
   if (!OI)
     return {false, nullptr};
 
-  FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
+  FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
   Cloner.NormalizeReturnBlock();
 
   Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();

diff  --git a/llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll b/llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll
new file mode 100644
index 000000000000..8f5a92df8407
--- /dev/null
+++ b/llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll
@@ -0,0 +1,55 @@
+; RUN: opt -partial-inliner -S < %s | FileCheck %s
+
+; Checks that valid costs are computed for intrinsic calls.
+; https://bugs.llvm.org/show_bug.cgi?id=45932
+
+
+ at emit_notes = external global i8, align 2
+
+; CHECK: var_reg_delete
+; CHECK-NEXT: bb
+; CHECK-NEXT: tail call void @delete_variable_part()
+; CHECK-NEXT: ret void
+
+define void @var_reg_delete() {
+bb:
+  tail call void @delete_variable_part()
+  ret void
+}
+
+; CHECK: delete_variable_part
+; CHECK-NEXT: bb
+; CHECK-NEXT: %tmp1.i = tail call i32 @find_variable_location_part()
+; CHECK-NEXT: %tmp3.i = icmp sgt i32 %tmp1.i, -1
+; CHECK-NEXT: br i1 %tmp3.i, label %bb4.i, label %delete_slot_part.exit
+
+; CHECK: bb4.i
+; CHECK-NEXT: %tmp.i.i = load i8, i8* @emit_notes
+; CHECK-NEXT:   %tmp1.i.i = icmp ne i8 %tmp.i.i, 0
+; CHECK-NEXT:  tail call void @llvm.assume(i1 %tmp1.i.i)
+; CHECK-NEXT:  unreachable
+
+; CHECK: delete_slot_part.exit
+; CHECK-NEXT: ret void
+
+define void @delete_variable_part() {
+bb:
+  %tmp1.i = tail call i32 @find_variable_location_part()
+  %tmp3.i = icmp sgt i32 %tmp1.i, -1
+  br i1 %tmp3.i, label %bb4.i, label %delete_slot_part.exit
+
+bb4.i:
+  %tmp.i.i = load i8, i8* @emit_notes, align 2
+  %tmp1.i.i = icmp ne i8 %tmp.i.i, 0
+  tail call void @llvm.assume(i1 %tmp1.i.i)
+  unreachable
+
+delete_slot_part.exit:
+  ret void
+}
+
+; CHECK: declare i32 @find_variable_location_part
+declare i32 @find_variable_location_part()
+
+; CHECK: declare void @llvm.assume(i1 noundef)
+declare void @llvm.assume(i1 noundef)


        


More information about the llvm-commits mailing list