[llvm] c965fd9 - Cost Annotation Writer for InlineCost
Artur Pilipenko via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 17:04:01 PST 2020
Author: Kirill Naumov
Date: 2020-02-26T17:03:52-08:00
New Revision: c965fd942f1d2de6179cd1a2f78c78fa4bd74626
URL: https://github.com/llvm/llvm-project/commit/c965fd942f1d2de6179cd1a2f78c78fa4bd74626
DIFF: https://github.com/llvm/llvm-project/commit/c965fd942f1d2de6179cd1a2f78c78fa4bd74626.diff
LOG: Cost Annotation Writer for InlineCost
Add extra diagnostics for the inline cost analysis under
-print-instruction-deltas cl option. When enabled along with
-debug-only=inline-cost it prints the IR of inline candidate
annotated with cost and threshold change per every instruction.
Reviewed By: apilipenko, davidxl, mtrofin
Differential Revision: https://reviews.llvm.org/D71501
Added:
llvm/test/Transforms/Inline/debuginline-cost-delta.ll
Modified:
llvm/lib/Analysis/InlineCost.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 98ece45cbc13..41824524395e 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -38,6 +39,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -51,6 +53,10 @@ static cl::opt<int>
cl::ZeroOrMore,
cl::desc("Default amount of inlining to perform"));
+static cl::opt<bool> PrintDebugInstructionDeltas("print-instruction-deltas",
+ cl::Hidden, cl::init(false),
+ cl::desc("Prints deltas of cost and threshold per instruction"));
+
static cl::opt<int> InlineThreshold(
"inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -99,6 +105,26 @@ static cl::opt<bool> OptComputeFullInlineCost(
namespace {
class InlineCostCallAnalyzer;
+
+// This struct is used to store information about inline cost of a
+// particular instruction
+struct InstructionCostDetail {
+ int CostBefore;
+ int CostAfter;
+ int ThresholdBefore;
+ int ThresholdAfter;
+};
+
+class CostAnnotationWriter : public AssemblyAnnotationWriter {
+public:
+ // This DenseMap stores the delta change in cost and threshold after
+ // accounting for the given instruction.
+ DenseMap <const Instruction *, InstructionCostDetail> CostThresholdMap;
+
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS);
+};
+
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
typedef InstVisitor<CallAnalyzer, bool> Base;
friend class InstVisitor<CallAnalyzer, bool>;
@@ -135,6 +161,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Called after a basic block was analyzed.
virtual void onBlockAnalyzed(const BasicBlock *BB) {}
+ /// Called before an instruction was analyzed
+ virtual void onInstructionAnalysisStart(const Instruction *I) {}
+
+ /// Called after an instruction was analyzed
+ virtual void onInstructionAnalysisFinish(const Instruction *I) {}
+
/// Called at the end of the analysis of the callsite. Return the outcome of
/// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
/// the reason it can't.
@@ -538,6 +570,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
}
}
+ void onInstructionAnalysisStart(const Instruction *I) override {
+ // This function is called to store the initial cost of inlining before
+ // the given instruction was assessed.
+ if (!PrintDebugInstructionDeltas)
+ return ;
+ Writer.CostThresholdMap[I].CostBefore = Cost;
+ Writer.CostThresholdMap[I].ThresholdBefore = Threshold;
+ }
+
+ void onInstructionAnalysisFinish(const Instruction *I) override {
+ // This function is called to find new values of cost and threshold after
+ // the instruction has been assessed.
+ if (!PrintDebugInstructionDeltas)
+ return ;
+ Writer.CostThresholdMap[I].CostAfter = Cost;
+ Writer.CostThresholdMap[I].ThresholdAfter = Threshold;
+ }
+
InlineResult finalizeAnalysis() override {
// Loops generally act a lot like calls in that they act like barriers to
// movement, require a certain amount of setup, etc. So when optimising for
@@ -637,6 +687,10 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
Params.ComputeFullInlineCost || ORE),
Params(Params), Threshold(Params.DefaultThreshold),
BoostIndirectCalls(BoostIndirect) {}
+
+ /// Annotation Writer for cost annotation
+ CostAnnotationWriter Writer;
+
void dump();
virtual ~InlineCostCallAnalyzer() {}
@@ -655,6 +709,25 @@ void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
EnabledSROAAllocas.erase(SROAArg);
disableLoadElimination();
}
+
+void CostAnnotationWriter::emitInstructionAnnot(
+ const Instruction *I, formatted_raw_ostream &OS) {
+ // The cost of inlining of the given instruction is printed always.
+ // The threshold delta is printed only when it is non-zero. It happens
+ // when we decided to give a bonus at a particular instruction.
+ OS << "; cost before = " << CostThresholdMap[I].CostBefore <<
+ ", cost after = " << CostThresholdMap[I].CostAfter <<
+ ", threshold before = " << CostThresholdMap[I].ThresholdBefore <<
+ ", threshold after = " << CostThresholdMap[I].ThresholdAfter <<
+ ", ";
+ OS << "cost delta = " << CostThresholdMap[I].CostAfter -
+ CostThresholdMap[I].CostBefore;
+ if (CostThresholdMap[I].ThresholdAfter != CostThresholdMap[I].ThresholdBefore)
+ OS << ", threshold delta = " << CostThresholdMap[I].ThresholdAfter -
+ CostThresholdMap[I].ThresholdBefore;
+ OS << "\n";
+}
+
/// If 'V' maps to a SROA candidate, disable SROA for it.
void CallAnalyzer::disableSROA(Value *V) {
if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
@@ -1763,11 +1836,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
// all of the per-instruction logic. The visit tree returns true if we
// consumed the instruction in any way, and false if the instruction's base
// cost should count against inlining.
+ onInstructionAnalysisStart(&*I);
+
if (Base::visit(&*I))
++NumInstructionsSimplified;
else
onMissedSimplification();
+ onInstructionAnalysisFinish(&*I);
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
InlineResult IR = InlineResult::success();
@@ -2049,6 +2125,8 @@ InlineResult CallAnalyzer::analyze() {
/// Dump stats about this call's analysis.
LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
+ if (PrintDebugInstructionDeltas)
+ F.print(dbgs(), &Writer);
DEBUG_PRINT_STAT(NumConstantArgs);
DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
DEBUG_PRINT_STAT(NumAllocaArgs);
diff --git a/llvm/test/Transforms/Inline/debuginline-cost-delta.ll b/llvm/test/Transforms/Inline/debuginline-cost-delta.ll
new file mode 100644
index 000000000000..a352c66833c6
--- /dev/null
+++ b/llvm/test/Transforms/Inline/debuginline-cost-delta.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -inline -debug-only=inline-cost -disable-output -print-instruction-deltas 2>&1 | FileCheck %s
+
+; CHECK: Analyzing call of callee1... (caller:foo)
+; CHECK: define i32 @callee1(i32 %x) {
+; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5
+; CHECK: %x1 = add i32 %x, 1
+; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5
+; CHECK: %x2 = add i32 %x1, 1
+; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5
+; CHECK: %x3 = add i32 %x2, 1
+; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 0
+; CHECK: ret i32 %x3
+; CHECK: }
+; CHECK: NumConstantArgs: 0
+; CHECK: NumConstantOffsetPtrArgs: 0
+; CHECK: NumAllocaArgs: 0
+; CHECK: NumConstantPtrCmps: 0
+; CHECK: NumConstantPtrDiffs: 0
+; CHECK: NumInstructionsSimplified: 1
+; CHECK: NumInstructions: 4
+; CHECK: SROACostSavings: 0
+; CHECK: SROACostSavingsLost: 0
+; CHECK: LoadEliminationCost: 0
+; CHECK: ContainsNoDuplicateCall: 0
+; CHECK: Cost: {{.*}}
+; CHECK: Threshold: {{.*}}
+
+define i32 @foo(i32 %y) {
+ %x = call i32 @callee1(i32 %y)
+ ret i32 %x
+}
+
+define i32 @callee1(i32 %x) {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+ ret i32 %x3
+}
More information about the llvm-commits
mailing list