[llvm] 029946b - [InlineAdvisor] New inliner advisor to replay inlining from optimization remarks

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 17 13:31:01 PDT 2020


Author: Wenlei He
Date: 2020-07-17T13:30:47-07:00
New Revision: 029946b112684c27b27f7c2d7554f22b33ae1e0b

URL: https://github.com/llvm/llvm-project/commit/029946b112684c27b27f7c2d7554f22b33ae1e0b
DIFF: https://github.com/llvm/llvm-project/commit/029946b112684c27b27f7c2d7554f22b33ae1e0b.diff

LOG: [InlineAdvisor] New inliner advisor to replay inlining from optimization remarks

Summary:
This change added a new inline advisor that takes optimization remarks for previous inlining as input, and provide the decision as advice so current inlining can replay inline decision of a different compilation. Dwarf inline stack with line and discriminator is used as anchor for call sites. The change can be useful for Inliner tuning.
A switch -sample-profile-inline-replay=<inline_remarks_file> is added to hook up the new inliner advisor with SampleProfileLoader's inline decision for replay. The new inline advisor can also be used by regular CGSCC inliner later if needed.

Reviewers: davidxl, mtrofin, wmi, hoy

Subscribers: aprantl, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83743

Added: 
    llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
    llvm/lib/Analysis/ReplayInlineAdvisor.cpp
    llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
    llvm/test/Transforms/SampleProfile/inline-replay.ll

Modified: 
    llvm/include/llvm/Analysis/InlineAdvisor.h
    llvm/lib/Analysis/CMakeLists.txt
    llvm/lib/Analysis/InlineAdvisor.cpp
    llvm/lib/Transforms/IPO/SampleProfile.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 3480d93385a8..a0ff09679dfe 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -226,6 +226,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
                      bool ForProfileContext = false,
                      const char *PassName = nullptr);
 
+/// get call site location as string
+StringRef getCallSiteLocation(DebugLoc DLoc);
+
 /// Add location info to ORE message.
 void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
 

diff  --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
new file mode 100644
index 000000000000..e312d59a9f87
--- /dev/null
+++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -0,0 +1,37 @@
+//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_REPLAYINLINEADVISOR_H_
+#define LLVM_REPLAYINLINEADVISOR_H_
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+class BasicBlock;
+class CallBase;
+class Function;
+class Module;
+class OptimizationRemarkEmitter;
+
+/// Replay inline advisor that uses optimization remarks from inlining of
+/// previous build to guide current inlining. This is useful for inliner tuning.
+class ReplayInlineAdvisor : public InlineAdvisor {
+public:
+  ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
+                      StringRef RemarksFile);
+  std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
+  bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
+
+private:
+  StringSet<> InlineSitesFromRemarks;
+  bool HasReplayRemarks = false;
+};
+} // namespace llvm
+#endif // LLVM_REPLAYINLINEADVISOR_H_
\ No newline at end of file

diff  --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 703623396d96..8f10bac588e5 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -117,6 +117,7 @@ add_llvm_component_library(LLVMAnalysis
   RegionInfo.cpp
   RegionPass.cpp
   RegionPrinter.cpp
+  ReplayInlineAdvisor.cpp
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionDivision.cpp

diff  --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index e18f681278d3..fedc5282ee64 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -365,6 +365,31 @@ llvm::shouldInline(CallBase &CB,
   return IC;
 }
 
+StringRef llvm::getCallSiteLocation(DebugLoc DLoc) {
+  std::ostringstream CallSiteLoc;
+  bool First = true;
+  for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
+    if (!First)
+      CallSiteLoc << " @ ";
+    // Note that negative line offset is actually possible, but we use
+    // unsigned int to match line offset representation in remarks so
+    // it's directly consumable by relay advisor.
+    uint32_t Offset =
+        DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
+    uint32_t Discriminator = DIL->getBaseDiscriminator();
+    StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+    if (Name.empty())
+      Name = DIL->getScope()->getSubprogram()->getName();
+    CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
+    if (Discriminator) {
+      CallSiteLoc << "." << llvm::utostr(Discriminator);
+    }
+    First = false;
+  }
+
+  return CallSiteLoc.str();
+}
+
 void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
   if (!DLoc.get())
     return;

diff  --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
new file mode 100644
index 000000000000..c12b58021a60
--- /dev/null
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -0,0 +1,61 @@
+//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ReplayInlineAdvisor that replays inline decision based
+// on previous inline remarks from optimization remark log.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/LineIterator.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline-replay"
+
+ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
+                                         LLVMContext &Context,
+                                         StringRef RemarksFile)
+    : InlineAdvisor(FAM), HasReplayRemarks(false) {
+  auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
+  std::error_code EC = BufferOrErr.getError();
+  if (EC) {
+    Context.emitError("Could not open remarks file: " + EC.message());
+    return;
+  }
+
+  // Example for inline remarks to parse:
+  //   _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
+  // We use the callsite string after `at callsite` to replay inlining.
+  line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
+  for (; !LineIt.is_at_eof(); ++LineIt) {
+    StringRef Line = *LineIt;
+    auto Pair = Line.split(" at callsite ");
+    if (Pair.second.empty())
+      continue;
+    InlineSitesFromRemarks.insert(Pair.second);
+  }
+  HasReplayRemarks = true;
+}
+
+std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
+  assert(HasReplayRemarks);
+
+  Function &Caller = *CB.getCaller();
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
+
+  if (InlineSitesFromRemarks.empty())
+    return std::make_unique<InlineAdvice>(this, CB, ORE, false);
+
+  StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
+  bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
+  return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
+}

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b6871e260532..7b5fc030cf88 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
@@ -170,6 +171,13 @@ static cl::opt<int> SampleColdCallSiteThreshold(
     "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining cold callsites"));
 
+static cl::opt<std::string> ProfileInlineReplayFile(
+    "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
+    cl::desc(
+        "Optimization remarks file containing inline remarks to be replayed "
+        "by inlining from sample profile loader."),
+    cl::Hidden);
+
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -319,7 +327,7 @@ class SampleProfileLoader {
         RemappingFilename(std::string(RemapName)),
         IsThinLTOPreLink(IsThinLTOPreLink) {}
 
-  bool doInitialization(Module &M);
+  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
                    ProfileSummaryInfo *_PSI, CallGraph *CG);
 
@@ -473,6 +481,9 @@ class SampleProfileLoader {
   // overriden by -profile-sample-accurate or profile-sample-accurate
   // attribute.
   bool ProfAccForSymsInList;
+
+  // External inline advisor used to replay inline decision from remarks.
+  std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
 };
 
 class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -898,6 +909,16 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 }
 
 bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
+  if (ExternalInlineAdvisor) {
+    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
+    if (!Advice->isInliningRecommended()) {
+      Advice->recordUnattemptedInlining();
+      return false;
+    }
+    // Dummy record, we don't use it for replay.
+    Advice->recordInlining();
+  }
+
   Function *CalledFunction = CB.getCalledFunction();
   assert(CalledFunction);
   DebugLoc DLoc = CB.getDebugLoc();
@@ -1005,7 +1026,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           }
         }
       }
-      if (Hot) {
+      if (Hot || ExternalInlineAdvisor) {
         CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
         emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
       } else {
@@ -1818,7 +1839,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   return FunctionOrderList;
 }
 
-bool SampleProfileLoader::doInitialization(Module &M) {
+bool SampleProfileLoader::doInitialization(Module &M,
+                                           FunctionAnalysisManager *FAM) {
   auto &Ctx = M.getContext();
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
@@ -1843,6 +1865,13 @@ bool SampleProfileLoader::doInitialization(Module &M) {
       NamesInProfile.insert(NameTable->begin(), NameTable->end());
   }
 
+  if (FAM && !ProfileInlineReplayFile.empty()) {
+    ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
+        *FAM, Ctx, ProfileInlineReplayFile);
+    if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
+      ExternalInlineAdvisor.reset();
+  }
+
   return true;
 }
 
@@ -1995,7 +2024,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
                                        : ProfileRemappingFileName,
       IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
 
-  if (!SampleLoader.doInitialization(M))
+  if (!SampleLoader.doInitialization(M, &FAM))
     return PreservedAnalyses::all();
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
new file mode 100644
index 000000000000..6842845d5655
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
@@ -0,0 +1,2 @@
+remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
+remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1

diff  --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll
new file mode 100644
index 000000000000..ecf6f51850f2
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-replay.ll
@@ -0,0 +1,122 @@
+;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
+
+;; Check baseline inline decisions
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
+
+;; Check replay inline decisions
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
+
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
+  %add = add nsw i32 %tmp, %tmp1, !dbg !8
+  %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
+  %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
+  ret i32 %add, !dbg !8
+}
+
+define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !10
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
+  %add = sub nsw i32 %tmp, %tmp1, !dbg !10
+  ret i32 %add, !dbg !11
+}
+
+define i32 @main() #0 !dbg !12 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !13
+  br label %while.cond, !dbg !14
+
+while.cond:                                       ; preds = %if.end, %entry
+  %tmp = load i32, i32* %i, align 4, !dbg !15
+  %inc = add nsw i32 %tmp, 1, !dbg !15
+  store i32 %inc, i32* %i, align 4, !dbg !15
+  %cmp = icmp slt i32 %tmp, 400000000, !dbg !15
+  br i1 %cmp, label %while.body, label %while.end, !dbg !15
+
+while.body:                                       ; preds = %while.cond
+  %tmp1 = load i32, i32* %i, align 4, !dbg !17
+  %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !17
+
+if.then:                                          ; preds = %while.body
+  %tmp2 = load i32, i32* %i, align 4, !dbg !19
+  %tmp3 = load i32, i32* %s, align 4, !dbg !19
+  %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
+  store i32 %call, i32* %s, align 4, !dbg !19
+  br label %if.end, !dbg !19
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !21
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !23
+
+while.end:                                        ; preds = %while.cond
+  %tmp4 = load i32, i32* %s, align 4, !dbg !25
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
+  ret i32 0, !dbg !26
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.5 "}
+!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 4, scope: !6)
+!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!10 = !DILocation(line: 20, scope: !9)
+!11 = !DILocation(line: 21, scope: !9)
+!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 8, scope: !12)
+!14 = !DILocation(line: 9, scope: !12)
+!15 = !DILocation(line: 9, scope: !16)
+!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
+!17 = !DILocation(line: 10, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
+!19 = !DILocation(line: 10, scope: !20)
+!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
+!21 = !DILocation(line: 10, scope: !22)
+!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
+!23 = !DILocation(line: 10, scope: !24)
+!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
+!25 = !DILocation(line: 11, scope: !12)
+!26 = !DILocation(line: 12, scope: !12)
+
+
+; DEFAULT: _Z3sumii inlined into main
+; DEFAULT: _Z3subii inlined into _Z3sumii
+; DEFAULT-NOT: _Z3subii inlined into main 
+
+; REPLAY: _Z3sumii inlined into main
+; REPLAY: _Z3subii inlined into main 
+; REPLA-NOT: _Z3subii inlined into _Z3sumii


        


More information about the llvm-commits mailing list