[llvm-branch-commits] [llvm] 21b1ad0 - [SampleFDO] Add the support to split the function profiles with context into

Wei Mi via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jan 19 15:21:15 PST 2021


Author: Wei Mi
Date: 2021-01-19T15:16:19-08:00
New Revision: 21b1ad0340a7ba69c605ea1c218adb567b5190ae

URL: https://github.com/llvm/llvm-project/commit/21b1ad0340a7ba69c605ea1c218adb567b5190ae
DIFF: https://github.com/llvm/llvm-project/commit/21b1ad0340a7ba69c605ea1c218adb567b5190ae.diff

LOG: [SampleFDO] Add the support to split the function profiles with context into
separate sections.

For ThinLTO, all the function profiles without context has been annotated to
outline functions if possible in prelink phase. In postlink phase, profile
annotation in postlink phase is only meaningful for function profile with
context. If the profile is large, it is better to split the profile into two
parts, one with context and one without, so the profile reading in postlink
phase only has to read the part with context. To have the profile splitting,
we extend the ExtBinary format to support different section arrangement. It
will be flexible to add other section layout in the future without the need
to create new class inheriting from ExtBinary class.

Differential Revision: https://reviews.llvm.org/D94435

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/ctxsplit.extbinary.afdo
    llvm/test/Transforms/SampleProfile/ctxsplit.ll

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/ProfileData/SampleProfReader.h
    llvm/include/llvm/ProfileData/SampleProfWriter.h
    llvm/lib/ProfileData/SampleProfReader.cpp
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/lib/Transforms/IPO/SampleProfile.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index c423466fe75b..c45ace9e68c1 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -164,7 +164,9 @@ struct SecHdrTableEntry {
 // will be saved in the higher 32 bits.
 enum class SecCommonFlags : uint32_t {
   SecFlagInValid = 0,
-  SecFlagCompress = (1 << 0)
+  SecFlagCompress = (1 << 0),
+  // Indicate the section contains only profile without context.
+  SecFlagFlat = (1 << 1)
 };
 
 // Section specific flags are defined here.

diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 92fe825beefc..3f52a2f6163b 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -451,6 +451,10 @@ class SampleProfileReader {
   /// Return whether names in the profile are all MD5 numbers.
   virtual bool useMD5() { return false; }
 
+  /// Don't read profile without context if the flag is set. This is only meaningful
+  /// for ExtBinary format.
+  virtual void setSkipFlatProf(bool Skip) {}
+
   SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); }
 
 protected:
@@ -666,6 +670,10 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
   std::unique_ptr<std::vector<std::string>> MD5StringBuf;
 
+  /// If SkipFlatProf is true, skip the sections with
+  /// SecFlagFlat flag.
+  bool SkipFlatProf = false;
+
 public:
   SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
                                    LLVMContext &C, SampleProfileFormat Format)
@@ -689,6 +697,8 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
     return std::move(ProfSymList);
   };
+
+  virtual void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
 };
 
 class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {

diff  --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index fc568f06ffc8..e72963ad5c74 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -15,6 +15,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/ErrorOr.h"
@@ -28,6 +29,15 @@
 namespace llvm {
 namespace sampleprof {
 
+enum SectionLayout {
+  DefaultLayout,
+  // The layout splits profile with context information from profile without
+  // context information. When Thinlto is enabled, ThinLTO postlink phase only
+  // has to load profile with context information and can skip the other part.
+  CtxSplitLayout,
+  NumOfLayout,
+};
+
 /// Sample-based profile writer. Base class.
 class SampleProfileWriter {
 public:
@@ -60,6 +70,7 @@ class SampleProfileWriter {
   virtual void setToCompressAllSections() {}
   virtual void setUseMD5() {}
   virtual void setPartialProfile() {}
+  virtual void resetSecLayout(SectionLayout SL) {}
 
 protected:
   SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
@@ -144,6 +155,36 @@ class SampleProfileWriterRawBinary : public SampleProfileWriterBinary {
   using SampleProfileWriterBinary::SampleProfileWriterBinary;
 };
 
+const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
+    ExtBinaryHdrLayoutTable = {
+        // Note that SecFuncOffsetTable section is written after SecLBRProfile
+        // in the profile, but is put before SecLBRProfile in SectionHdrLayout.
+        // This is because sample reader follows the order in SectionHdrLayout
+        // to read each section. To read function profiles on demand, sample
+        // reader need to get the offset of each function profile first.
+        //
+        // DefaultLayout
+        SmallVector<SecHdrTableEntry, 8>({{SecProfSummary},
+                                          {SecNameTable},
+                                          {SecFuncOffsetTable},
+                                          {SecLBRProfile},
+                                          {SecProfileSymbolList},
+                                          {SecFuncMetadata}}),
+        // CtxSplitLayout
+        SmallVector<SecHdrTableEntry, 8>({{SecProfSummary},
+                                          {SecNameTable},
+                                          // profile with context
+                                          // for next two sections
+                                          {SecFuncOffsetTable},
+                                          {SecLBRProfile},
+                                          // profile without context
+                                          // for next two sections
+                                          {SecFuncOffsetTable},
+                                          {SecLBRProfile},
+                                          {SecProfileSymbolList},
+                                          {SecFuncMetadata}}),
+};
+
 class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
   using SampleProfileWriterBinary::SampleProfileWriterBinary;
 public:
@@ -174,6 +215,19 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
     ProfSymList = PSL;
   };
 
+  virtual void resetSecLayout(SectionLayout SL) override {
+    verifySecLayout(SL);
+#ifndef NDEBUG
+    // Make sure resetSecLayout is called before any flag setting.
+    for (auto &Entry : SectionHdrLayout) {
+      assert(Entry.Flags == 0 &&
+             "resetSecLayout has to be called before any flag setting");
+    }
+#endif
+    SecLayout = SL;
+    SectionHdrLayout = ExtBinaryHdrLayoutTable[SL];
+  }
+
 protected:
   uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx);
   std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx,
@@ -185,11 +239,16 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
         addSecFlag(Entry, Flag);
     }
   }
+  template <class SecFlagType>
+  void addSectionFlag(uint32_t SectionIdx, SecFlagType Flag) {
+    addSecFlag(SectionHdrLayout[SectionIdx], Flag);
+  }
 
   // placeholder for subclasses to dispatch their own section writers.
   virtual std::error_code writeCustomSection(SecType Type) = 0;
+  // Verify the SecLayout is supported by the format.
+  virtual void verifySecLayout(SectionLayout SL) = 0;
 
-  virtual void initSectionHdrLayout() = 0;
   // specify the order to write sections.
   virtual std::error_code
   writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0;
@@ -211,11 +270,13 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
   std::error_code writeFuncOffsetTable();
   std::error_code writeProfileSymbolListSection();
 
+  SectionLayout SecLayout = DefaultLayout;
   // Specifiy the order of sections in section header table. Note
   // the order of sections in SecHdrTable may be 
diff erent that the
   // order in SectionHdrLayout. sample Reader will follow the order
   // in SectionHdrLayout to read each section.
-  SmallVector<SecHdrTableEntry, 8> SectionHdrLayout;
+  SmallVector<SecHdrTableEntry, 8> SectionHdrLayout =
+      ExtBinaryHdrLayoutTable[DefaultLayout];
 
   // Save the start of SecLBRProfile so we can compute the offset to the
   // start of SecLBRProfile for each Function's Profile and will keep it
@@ -261,33 +322,25 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
 class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase {
 public:
   SampleProfileWriterExtBinary(std::unique_ptr<raw_ostream> &OS)
-      : SampleProfileWriterExtBinaryBase(OS) {
-    initSectionHdrLayout();
-  }
+      : SampleProfileWriterExtBinaryBase(OS) {}
 
 private:
-  virtual void initSectionHdrLayout() override {
-    // Note that SecFuncOffsetTable section is written after SecLBRProfile
-    // in the profile, but is put before SecLBRProfile in SectionHdrLayout.
-    //
-    // This is because sample reader follows the order of SectionHdrLayout to
-    // read each section, to read function profiles on demand sample reader
-    // need to get the offset of each function profile first.
-    //
-    // SecFuncOffsetTable section is written after SecLBRProfile in the
-    // profile because FuncOffsetTable needs to be populated while section
-    // SecLBRProfile is written.
-    SectionHdrLayout = {
-        {SecProfSummary, 0, 0, 0, 0},       {SecNameTable, 0, 0, 0, 0},
-        {SecFuncOffsetTable, 0, 0, 0, 0},   {SecLBRProfile, 0, 0, 0, 0},
-        {SecProfileSymbolList, 0, 0, 0, 0}, {SecFuncMetadata, 0, 0, 0, 0}};
-  };
+  std::error_code
+  writeDefaultLayout(const StringMap<FunctionSamples> &ProfileMap);
+  std::error_code
+  writeCtxSplitLayout(const StringMap<FunctionSamples> &ProfileMap);
+
   virtual std::error_code
   writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
 
   virtual std::error_code writeCustomSection(SecType Type) override {
     return sampleprof_error::success;
   };
+
+  virtual void verifySecLayout(SectionLayout SL) override {
+    assert((SL == DefaultLayout || SL == CtxSplitLayout) &&
+           "Unsupported layout");
+  }
 };
 
 // CompactBinary is a compact format of binary profile which both reduces

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index a8ffb378fb34..e8ac06d8637f 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -740,6 +740,10 @@ std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
     if (!Entry.Size)
       continue;
 
+    // Skip sections without context when SkipFlatProf is true.
+    if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
+      continue;
+
     const uint8_t *SecStart = BufStart + Entry.Offset;
     uint64_t SecSize = Entry.Size;
 
@@ -986,6 +990,9 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
   else
     Flags.append("{");
 
+  if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
+    Flags.append("flat,");
+
   switch (Entry.Type) {
   case SecNameTable:
     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 60cfe505d19b..71dba6281f76 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -19,6 +19,7 @@
 
 #include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Compression.h"
@@ -264,7 +265,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
   return sampleprof_error::success;
 }
 
-std::error_code SampleProfileWriterExtBinary::writeSections(
+std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
     const StringMap<FunctionSamples> &ProfileMap) {
   // The const indices passed to writeOneSection below are specifying the
   // positions of the sections in SectionHdrLayout. Look at
@@ -285,6 +286,61 @@ std::error_code SampleProfileWriterExtBinary::writeSections(
   return sampleprof_error::success;
 }
 
+static void
+splitProfileMapToTwo(const StringMap<FunctionSamples> &ProfileMap,
+                     StringMap<FunctionSamples> &ContextProfileMap,
+                     StringMap<FunctionSamples> &NoContextProfileMap) {
+  for (const auto &I : ProfileMap) {
+    if (I.second.getCallsiteSamples().size())
+      ContextProfileMap.insert({I.first(), I.second});
+    else
+      NoContextProfileMap.insert({I.first(), I.second});
+  }
+}
+
+std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
+    const StringMap<FunctionSamples> &ProfileMap) {
+  StringMap<FunctionSamples> ContextProfileMap, NoContextProfileMap;
+  splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap);
+
+  if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
+    return EC;
+  if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
+    return EC;
+  if (auto EC = writeOneSection(SecLBRProfile, 3, ContextProfileMap))
+    return EC;
+  if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ContextProfileMap))
+    return EC;
+  // Mark the section to have no context. Note section flag needs to be set
+  // before writing the section.
+  addSectionFlag(5, SecCommonFlags::SecFlagFlat);
+  if (auto EC = writeOneSection(SecLBRProfile, 5, NoContextProfileMap))
+    return EC;
+  // Mark the section to have no context. Note section flag needs to be set
+  // before writing the section.
+  addSectionFlag(4, SecCommonFlags::SecFlagFlat);
+  if (auto EC = writeOneSection(SecFuncOffsetTable, 4, NoContextProfileMap))
+    return EC;
+  if (auto EC = writeOneSection(SecProfileSymbolList, 6, ProfileMap))
+    return EC;
+  if (auto EC = writeOneSection(SecFuncMetadata, 7, ProfileMap))
+    return EC;
+
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterExtBinary::writeSections(
+    const StringMap<FunctionSamples> &ProfileMap) {
+  std::error_code EC;
+  if (SecLayout == DefaultLayout)
+    EC = writeDefaultLayout(ProfileMap);
+  else if (SecLayout == CtxSplitLayout)
+    EC = writeCtxSplitLayout(ProfileMap);
+  else
+    llvm_unreachable("Unsupported layout");
+  return EC;
+}
+
 std::error_code SampleProfileWriterCompactBinary::write(
     const StringMap<FunctionSamples> &ProfileMap) {
   if (std::error_code EC = SampleProfileWriter::write(ProfileMap))

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 30da7b6e218a..ef1ec9ca7b7a 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1946,6 +1946,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
     return false;
   }
   Reader = std::move(ReaderOrErr.get());
+  Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
   Reader->collectFuncsFrom(M);
   ProfileIsValid = (Reader->read() == sampleprof_error::success);
   PSL = Reader->getProfileSymbolList();
@@ -2111,7 +2112,10 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
       initialEntryCount = -1;
   }
 
-  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
+  // Initialize entry count when the function has no existing entry
+  // count value.
+  if (!F.getEntryCount().hasValue())
+    F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
   std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
   if (AM) {
     auto &FAM =

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/ctxsplit.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/ctxsplit.extbinary.afdo
new file mode 100644
index 000000000000..8c27e21db1b4
Binary files /dev/null and b/llvm/test/Transforms/SampleProfile/Inputs/ctxsplit.extbinary.afdo 
diff er

diff  --git a/llvm/test/Transforms/SampleProfile/ctxsplit.ll b/llvm/test/Transforms/SampleProfile/ctxsplit.ll
new file mode 100644
index 000000000000..b97f73796f83
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/ctxsplit.ll
@@ -0,0 +1,59 @@
+; Check the nonflattened part of the ctxsplit profile will be read in thinlto
+; postlink phase while flattened part of the ctxsplit profile will not be read.
+; RUN: opt < %s -passes='thinlto<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=POSTLINK
+;
+; Check both the flattened and nonflattened parts of the ctxsplit profile will
+; be read in thinlto prelink phase.
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=PRELINK
+;
+; Check both the flattened and nonflattened parts of the ctxsplit profile will
+; be read in non-thinlto mode.
+; RUN: opt < %s -passes='default<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=NOTHINLTO
+
+; POSTLINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] {
+; POSTLINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] {
+; POSTLINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001}
+; POSTLINK: ![[ENTRY2]] = !{!"function_entry_count", i64 -1}
+; PRELINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] {
+; PRELINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] {
+; PRELINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001}
+; PRELINK: ![[ENTRY2]] = !{!"function_entry_count", i64 3001}
+; NOTHINLTO: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] {
+; NOTHINLTO: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] {
+; NOTHINLTO: ![[ENTRY1]] = !{!"function_entry_count", i64 1001}
+; NOTHINLTO: ![[ENTRY2]] = !{!"function_entry_count", i64 3001}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define dso_local i32 @goo() #0 !dbg !10 {
+entry:
+  ret i32 -1, !dbg !11
+}
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define dso_local i32 @foo() #0 !dbg !7 {
+entry:
+  ret i32 -1, !dbg !9
+}
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "a.c", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 8.0.0 (trunk 345241)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 2, column: 3, scope: !7)
+!10 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !0, retainedNodes: !2)
+!11 = !DILocation(line: 10, column: 3, scope: !10)
+


        


More information about the llvm-branch-commits mailing list