[llvm] 3f96f7b - Fix getInlineCost with ComputeFullInlineCost enabled

Artur Pilipenko via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 14 17:48:30 PDT 2021


Author: Artur Pilipenko
Date: 2021-10-14T17:41:41-07:00
New Revision: 3f96f7b30c91b912de1c6c7c03ab6a4c18e8aa26

URL: https://github.com/llvm/llvm-project/commit/3f96f7b30c91b912de1c6c7c03ab6a4c18e8aa26
DIFF: https://github.com/llvm/llvm-project/commit/3f96f7b30c91b912de1c6c7c03ab6a4c18e8aa26.diff

LOG: Fix getInlineCost with ComputeFullInlineCost enabled

Fix a bug when getInlineCost incorrectly returns a
cost/threshold pair instead of an explicit never inline.

Reviewed By: mtrofin
Differential Revision: https://reviews.llvm.org/D111687

Added: 
    llvm/test/Transforms/Inline/noduplicate.ll
    llvm/test/Transforms/SampleProfile/inline-noduplicate.ll

Modified: 
    llvm/lib/Analysis/InlineCost.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 7ffe2aac689d0..4b5c1bf46a628 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -545,6 +545,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
   // sense that it's not weighted by profile counts at all.
   int ColdSize = 0;
 
+  // Whether inlining is decided by cost-threshold analysis.
+  bool DecidedByCostThreshold = false;
+
   // Whether inlining is decided by cost-benefit analysis.
   bool DecidedByCostBenefit = false;
 
@@ -914,14 +917,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
         return InlineResult::failure("Cost over threshold.");
     }
 
-    if (IgnoreThreshold || Cost < std::max(1, Threshold))
+    if (IgnoreThreshold)
       return InlineResult::success();
-    return InlineResult::failure("Cost over threshold.");
+
+    DecidedByCostThreshold = true;
+    return Cost < std::max(1, Threshold)
+               ? InlineResult::success()
+               : InlineResult::failure("Cost over threshold.");
   }
+
   bool shouldStop() override {
+    if (IgnoreThreshold || ComputeFullInlineCost)
+      return false;
     // Bail out the moment we cross the threshold. This means we'll under-count
     // the cost, but only when undercounting doesn't matter.
-    return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost;
+    if (Cost < Threshold)
+      return false;
+    DecidedByCostThreshold = true;
+    return true;
   }
 
   void onLoadEliminationOpportunity() override {
@@ -1013,6 +1026,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
   int getCost() const { return Cost; }
   Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
   bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
+  bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
 };
 
 class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
@@ -2934,13 +2948,13 @@ InlineCost llvm::getInlineCost(
       return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
   }
 
-  // Check if there was a reason to force inlining or no inlining.
-  if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold())
-    return InlineCost::getNever(ShouldInline.getFailureReason());
-  if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold())
-    return InlineCost::getAlways("empty function");
+  if (CA.wasDecidedByCostThreshold())
+    return InlineCost::get(CA.getCost(), CA.getThreshold());
 
-  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
+  // No details on how the decision was made, simply return always or never.
+  return ShouldInline.isSuccess()
+             ? InlineCost::getAlways("empty function")
+             : InlineCost::getNever(ShouldInline.getFailureReason());
 }
 
 InlineResult llvm::isInlineViable(Function &F) {

diff  --git a/llvm/test/Transforms/Inline/noduplicate.ll b/llvm/test/Transforms/Inline/noduplicate.ll
new file mode 100644
index 0000000000000..b806488817d48
--- /dev/null
+++ b/llvm/test/Transforms/Inline/noduplicate.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -passes=inline -pass-remarks-missed=inline -inline-cost-full -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=inline -pass-remarks-missed=inline -S 2>&1 | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+
+define void @callee() {
+entry:
+  call void @foo() noduplicate
+  ; Just to inflate the cost
+  call void @bar() "call-inline-cost"="1000"
+  ret void
+}
+
+define void @caller() {
+; CHECK: 'callee' not inlined into 'caller' because it should never be inlined (cost=never): noduplicate
+; CHECK: define void @caller()
+; CHECK-NEXT: call void @callee()
+; CHECK-NEXT: ret void
+  call void @callee()
+  ret void
+}

diff  --git a/llvm/test/Transforms/SampleProfile/inline-noduplicate.ll b/llvm/test/Transforms/SampleProfile/inline-noduplicate.ll
new file mode 100644
index 0000000000000..16e4477262337
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-noduplicate.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+
+; Similar to inline.ll test, but the callee contains a noduplicate instruction.
+
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+declare void @foo()
+declare void @bar()
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !4 {
+entry:
+  call void @foo() noduplicate
+  ; Just to inflate the cost
+  call void @bar() "call-inline-cost"="1000"
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %0, %1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() #0 !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32, i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32, i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32, i32* %i, align 4, !dbg !18
+  %3 = load i32, i32* %s, align 4, !dbg !18
+  %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+; _Z3sumii should not be inlined because of the noduplicate call to foo.
+; CHECK: call i32 @_Z3sumii
+; CHECK-NOT: call void @foo
+  store i32 %call, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32, i32* %s, align 4, !dbg !24
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)


        


More information about the llvm-commits mailing list