[llvm] 3f96f7b - Fix getInlineCost with ComputeFullInlineCost enabled
Artur Pilipenko via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 14 17:48:30 PDT 2021
Author: Artur Pilipenko
Date: 2021-10-14T17:41:41-07:00
New Revision: 3f96f7b30c91b912de1c6c7c03ab6a4c18e8aa26
URL: https://github.com/llvm/llvm-project/commit/3f96f7b30c91b912de1c6c7c03ab6a4c18e8aa26
DIFF: https://github.com/llvm/llvm-project/commit/3f96f7b30c91b912de1c6c7c03ab6a4c18e8aa26.diff
LOG: Fix getInlineCost with ComputeFullInlineCost enabled
Fix a bug when getInlineCost incorrectly returns a
cost/threshold pair instead of an explicit never inline.
Reviewed By: mtrofin
Differential Revision: https://reviews.llvm.org/D111687
Added:
llvm/test/Transforms/Inline/noduplicate.ll
llvm/test/Transforms/SampleProfile/inline-noduplicate.ll
Modified:
llvm/lib/Analysis/InlineCost.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 7ffe2aac689d0..4b5c1bf46a628 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -545,6 +545,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// sense that it's not weighted by profile counts at all.
int ColdSize = 0;
+ // Whether inlining is decided by cost-threshold analysis.
+ bool DecidedByCostThreshold = false;
+
// Whether inlining is decided by cost-benefit analysis.
bool DecidedByCostBenefit = false;
@@ -914,14 +917,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
return InlineResult::failure("Cost over threshold.");
}
- if (IgnoreThreshold || Cost < std::max(1, Threshold))
+ if (IgnoreThreshold)
return InlineResult::success();
- return InlineResult::failure("Cost over threshold.");
+
+ DecidedByCostThreshold = true;
+ return Cost < std::max(1, Threshold)
+ ? InlineResult::success()
+ : InlineResult::failure("Cost over threshold.");
}
+
bool shouldStop() override {
+ if (IgnoreThreshold || ComputeFullInlineCost)
+ return false;
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
- return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost;
+ if (Cost < Threshold)
+ return false;
+ DecidedByCostThreshold = true;
+ return true;
}
void onLoadEliminationOpportunity() override {
@@ -1013,6 +1026,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
int getCost() const { return Cost; }
Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
+ bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
};
class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
@@ -2934,13 +2948,13 @@ InlineCost llvm::getInlineCost(
return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
}
- // Check if there was a reason to force inlining or no inlining.
- if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold())
- return InlineCost::getNever(ShouldInline.getFailureReason());
- if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold())
- return InlineCost::getAlways("empty function");
+ if (CA.wasDecidedByCostThreshold())
+ return InlineCost::get(CA.getCost(), CA.getThreshold());
- return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
+ // No details on how the decision was made, simply return always or never.
+ return ShouldInline.isSuccess()
+ ? InlineCost::getAlways("empty function")
+ : InlineCost::getNever(ShouldInline.getFailureReason());
}
InlineResult llvm::isInlineViable(Function &F) {
diff --git a/llvm/test/Transforms/Inline/noduplicate.ll b/llvm/test/Transforms/Inline/noduplicate.ll
new file mode 100644
index 0000000000000..b806488817d48
--- /dev/null
+++ b/llvm/test/Transforms/Inline/noduplicate.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -passes=inline -pass-remarks-missed=inline -inline-cost-full -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=inline -pass-remarks-missed=inline -S 2>&1 | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+
+define void @callee() {
+entry:
+ call void @foo() noduplicate
+ ; Just to inflate the cost
+ call void @bar() "call-inline-cost"="1000"
+ ret void
+}
+
+define void @caller() {
+; CHECK: 'callee' not inlined into 'caller' because it should never be inlined (cost=never): noduplicate
+; CHECK: define void @caller()
+; CHECK-NEXT: call void @callee()
+; CHECK-NEXT: ret void
+ call void @callee()
+ ret void
+}
diff --git a/llvm/test/Transforms/SampleProfile/inline-noduplicate.ll b/llvm/test/Transforms/SampleProfile/inline-noduplicate.ll
new file mode 100644
index 0000000000000..16e4477262337
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-noduplicate.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+
+; Similar to inline.ll test, but the callee contains a noduplicate instruction.
+
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+declare void @foo()
+declare void @bar()
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !4 {
+entry:
+ call void @foo() noduplicate
+ ; Just to inflate the cost
+ call void @bar() "call-inline-cost"="1000"
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4, !dbg !11
+ %1 = load i32, i32* %y.addr, align 4, !dbg !11
+ %add = add nsw i32 %0, %1, !dbg !11
+ ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() #0 !dbg !7 {
+entry:
+ %retval = alloca i32, align 4
+ %s = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval
+ store i32 0, i32* %i, align 4, !dbg !12
+ br label %while.cond, !dbg !13
+
+while.cond: ; preds = %if.end, %entry
+ %0 = load i32, i32* %i, align 4, !dbg !14
+ %inc = add nsw i32 %0, 1, !dbg !14
+ store i32 %inc, i32* %i, align 4, !dbg !14
+ %cmp = icmp slt i32 %0, 400000000, !dbg !14
+ br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body: ; preds = %while.cond
+ %1 = load i32, i32* %i, align 4, !dbg !16
+ %cmp1 = icmp ne i32 %1, 100, !dbg !16
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+
+if.then: ; preds = %while.body
+ %2 = load i32, i32* %i, align 4, !dbg !18
+ %3 = load i32, i32* %s, align 4, !dbg !18
+ %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+; _Z3sumii should not be inlined because of the noduplicate call to foo.
+; CHECK: call i32 @_Z3sumii
+; CHECK-NOT: call void @foo
+ store i32 %call, i32* %s, align 4, !dbg !18
+ br label %if.end, !dbg !18
+
+if.else: ; preds = %while.body
+ store i32 30, i32* %s, align 4, !dbg !20
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ br label %while.cond, !dbg !22
+
+while.end: ; preds = %while.cond
+ %4 = load i32, i32* %s, align 4, !dbg !24
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+ ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)
More information about the llvm-commits
mailing list