[llvm] [llvm-profdata] Enabled functionality to write split-layout profile (PR #101795)

William Junda Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 28 16:51:49 PDT 2024


https://github.com/huangjd updated https://github.com/llvm/llvm-project/pull/101795

>From 5b28323c83d67de48d53cdcfda448980bcc03615 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Sat, 3 Aug 2024 01:15:55 -0400
Subject: [PATCH 1/3] [llvm-profdata] Enabled functionality to write
 split-layout profile

Using the flag `-split_layout` in llvm-profdata merge, the output
profile can write profiles with and without inlined function into two
different extbinary sections (and their FuncOffsetTable too). The
section without inlined functions are marked with `SecFlagFlat` and is
skipped by ThinLTO because it provides no useful info.

The split layout feature was already implemented in SampleProfWriter but previously
there is no way from the tool to use it.
---
 llvm/docs/CommandGuide/llvm-profdata.rst          |   6 ++++++
 .../llvm-profdata/Inputs/split-layout.profdata    | Bin 0 -> 521 bytes
 .../tools/llvm-profdata/sample-split-layout.test  |   2 ++
 llvm/tools/llvm-profdata/llvm-profdata.cpp        |  12 ++++++++++++
 4 files changed, 20 insertions(+)
 create mode 100644 llvm/test/tools/llvm-profdata/Inputs/split-layout.profdata
 create mode 100644 llvm/test/tools/llvm-profdata/sample-split-layout.test

diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst
index acf016a6dbcd70..166f694a2a6114 100644
--- a/llvm/docs/CommandGuide/llvm-profdata.rst
+++ b/llvm/docs/CommandGuide/llvm-profdata.rst
@@ -162,6 +162,12 @@ OPTIONS
  coverage for the optimized target. This option can only be used with
  sample-based profile in extbinary format.
 
+.. option:: --split_layout=[true|false]
+
+ Split the profile data section to two with one containing sample profiles with
+ inlined functions and the other not. This option can only be used with
+ sample-based profile in extbinary format.
+
 .. option:: --convert-sample-profile-layout=[nest|flat]
 
  Convert the merged profile into a profile with a new layout. Supported
diff --git a/llvm/test/tools/llvm-profdata/Inputs/split-layout.profdata b/llvm/test/tools/llvm-profdata/Inputs/split-layout.profdata
new file mode 100644
index 0000000000000000000000000000000000000000..bc473ae76558f16d2f46a5e35e4e62cad6bedd0a
GIT binary patch
literal 521
zcmZp9a$)0_lT%g%r*kks03(!!Q9 at 9G45$DTOae+*LFLt;d=@nM7f^X-sJH@}`~s-F
z2UHxUA7&2=l);LoA141As$UW+53>hm4JS+?Q~+iUF8Th{1p;%qxtRqf_$?4%T+qt~
zBoEBunIORQV7U;GY}hJsfuDK7A-TESEC<f3Z02Bla7PPBHoP);!Op(myV*1r?hk)0
zf#im5HbC;m9}sz`*%nA{YXOn_!6a*ZlyOpGQ6>YBPRq~FWXR0R$;?YNgwjR~xrv#1
z3=Axn=W?^NFic}%U||9htPGnu7+4vB1RKKw0S0!E1SbQ~Sq%HR8NahIaWOExU}t1x
xc)`xVz{S9Pfgi}ez|X+I$igs}o8jy{Mj#)^V0gsBINt`yegq6|U;r`l0svM#PiFuC

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-profdata/sample-split-layout.test b/llvm/test/tools/llvm-profdata/sample-split-layout.test
new file mode 100644
index 00000000000000..60a194e1083624
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/sample-split-layout.test
@@ -0,0 +1,2 @@
+RUN: llvm-profdata merge --sample --extbinary --split_layout %p/Inputs/sample-profile.proftext -o %t-output
+RUN: diff %t-output %p/Inputs/split-layout.profdata
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1f6c4c604d57b5..1f6efa788b1b8e 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -207,6 +207,12 @@ cl::opt<bool> GenPartialProfile(
     "gen-partial-profile", cl::init(false), cl::Hidden,
     cl::sub(MergeSubcommand),
     cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
+cl::opt<bool> SplitLayout(
+    "split_layout", cl::init(false), cl::Hidden,
+    cl::sub(MergeSubcommand),
+    cl::desc("Split the profile to two sections with one containing sample "
+             "profiles with inlined functions and the another not (only "
+             "meaningful for -extbinary)"));
 cl::opt<std::string> SupplInstrWithSample(
     "supplement-instr-with-sample", cl::init(""), cl::Hidden,
     cl::sub(MergeSubcommand),
@@ -1492,6 +1498,12 @@ static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
     else
       Writer.setPartialProfile();
   }
+  if (SplitLayout) {
+    if (OutputFormat != PF_Ext_Binary)
+      warn("-split-layout is ignored. Specify -extbinary to enable it");
+    else
+      Writer.resetSecLayout(SectionLayout::CtxSplitLayout);
+  }
 }
 
 static void mergeSampleProfile(const WeightedFileVector &Inputs,

>From a300e89a25354343340ef55395ee59c37860e861 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Wed, 7 Aug 2024 22:34:00 -0400
Subject: [PATCH 2/3] Add CtxSplitLayout support for text mode (for manual
 inspection only) Fixed wordings refering to this flag, Previously it is
 described as "samples with/out context", which confuses with CSSPGO. Now they
 are called "samples/functions with/out inlined functions"

---
 llvm/docs/CommandGuide/llvm-profdata.rst      |  2 +-
 .../llvm/ProfileData/SampleProfWriter.h       | 28 ++++++++++++++-----
 llvm/lib/ProfileData/SampleProfReader.cpp     |  7 ++++-
 llvm/lib/ProfileData/SampleProfWriter.cpp     |  3 ++
 .../llvm-profdata/sample-split-layout.test    | 22 ++++++++++++++-
 llvm/tools/llvm-profdata/llvm-profdata.cpp    | 17 +++++------
 6 files changed, 61 insertions(+), 18 deletions(-)

diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst
index 166f694a2a6114..af840f3994b3d6 100644
--- a/llvm/docs/CommandGuide/llvm-profdata.rst
+++ b/llvm/docs/CommandGuide/llvm-profdata.rst
@@ -162,7 +162,7 @@ OPTIONS
  coverage for the optimized target. This option can only be used with
  sample-based profile in extbinary format.
 
-.. option:: --split_layout=[true|false]
+.. option:: --split-layout=[true|false]
 
  Split the profile data section to two with one containing sample profiles with
  inlined functions and the other not. This option can only be used with
diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 5398a44f13ba36..4b659eaf950b3e 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -28,9 +28,9 @@ namespace sampleprof {
 
 enum SectionLayout {
   DefaultLayout,
-  // The layout splits profile with context information from profile without
-  // context information. When Thinlto is enabled, ThinLTO postlink phase only
-  // has to load profile with context information and can skip the other part.
+  // The layout splits profile with inlined functions from profile without
+  // inlined functions. When Thinlto is enabled, ThinLTO postlink phase only
+  // has to load profile with inlined functions and can skip the other part.
   CtxSplitLayout,
   NumOfLayout,
 };
@@ -128,7 +128,7 @@ class SampleProfileWriter {
   virtual void setToCompressAllSections() {}
   virtual void setUseMD5() {}
   virtual void setPartialProfile() {}
-  virtual void resetSecLayout(SectionLayout SL) {}
+  virtual void setUseCtxSplitLayout() {}
 
 protected:
   SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
@@ -176,12 +176,22 @@ class SampleProfileWriterText : public SampleProfileWriter {
     return sampleprof_error::success;
   }
 
+  void setUseCtxSplitLayout() override {
+    MarkFlatProfiles = true;
+  }
+
 private:
   /// Indent level to use when writing.
   ///
   /// This is used when printing inlined callees.
   unsigned Indent = 0;
 
+  /// If set, writes metadata "!Flat" to functions without inlined functions.
+  /// This flag is for manual inspection only, it has no effect for the profile
+  /// reader because a text sample profile is read sequentially and functions
+  /// cannot be skipped.
+  bool MarkFlatProfiles = false;
+
   friend ErrorOr<std::unique_ptr<SampleProfileWriter>>
   SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
                               SampleProfileFormat Format);
@@ -242,11 +252,11 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
         // CtxSplitLayout
         SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0},
                                           {SecNameTable, 0, 0, 0, 0},
-                                          // profile with context
+                                          // profile with inlined functions
                                           // for next two sections
                                           {SecFuncOffsetTable, 0, 0, 0, 0},
                                           {SecLBRProfile, 0, 0, 0, 0},
-                                          // profile without context
+                                          // profile without inlined functions
                                           // for next two sections
                                           {SecFuncOffsetTable, 0, 0, 0, 0},
                                           {SecLBRProfile, 0, 0, 0, 0},
@@ -283,7 +293,11 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
     ProfSymList = PSL;
   };
 
-  void resetSecLayout(SectionLayout SL) override {
+  void setUseCtxSplitLayout() override {
+    resetSecLayout(SectionLayout::CtxSplitLayout);
+  }
+
+  void resetSecLayout(SectionLayout SL) {
     verifySecLayout(SL);
 #ifndef NDEBUG
     // Make sure resetSecLayout is called before any flag setting.
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4752465fc072e0..a62d46ef4554ec 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -223,6 +223,11 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
 
   if (Input[Depth] == '!') {
     LineTy = LineType::Metadata;
+    // This metadata is only for manual inspection only. We already created a
+    // FunctionSamples and put it in the profile map, so there is no point
+    // to skip profiles even they have no use for ThinLTO.
+    if (Input == StringRef(" !Flat"))
+      return true;
     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
   }
 
@@ -996,7 +1001,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
     if (!Entry.Size)
       continue;
 
-    // Skip sections without context when SkipFlatProf is true.
+    // Skip sections without inlined functions when SkipFlatProf is true.
     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
       continue;
 
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 1630fefb4fcfba..8ded44fdc073b8 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -624,6 +624,9 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
     LineCount++;
   }
 
+  if (Indent == 0 && MarkFlatProfiles && S.getCallsiteSamples().size() == 0)
+    OS << " !Flat\n";
+
   return sampleprof_error::success;
 }
 
diff --git a/llvm/test/tools/llvm-profdata/sample-split-layout.test b/llvm/test/tools/llvm-profdata/sample-split-layout.test
index 60a194e1083624..a1927c80b2e29b 100644
--- a/llvm/test/tools/llvm-profdata/sample-split-layout.test
+++ b/llvm/test/tools/llvm-profdata/sample-split-layout.test
@@ -1,2 +1,22 @@
-RUN: llvm-profdata merge --sample --extbinary --split_layout %p/Inputs/sample-profile.proftext -o %t-output
+RUN: llvm-profdata merge --sample --extbinary --split-layout %p/Inputs/sample-profile.proftext -o %t-output
 RUN: diff %t-output %p/Inputs/split-layout.profdata
+
+RUN: llvm-profdata merge --sample --text --split-layout %t-output | FileCheck %s
+CHECK: main:184019:0
+CHECK-NEXT:  4: 534
+CHECK-NEXT:  4.2: 534
+CHECK-NEXT:  5: 1075
+CHECK-NEXT:  5.1: 1075
+CHECK-NEXT:  6: 2080
+CHECK-NEXT:  7: 534
+CHECK-NEXT:  9: 2064 _Z3bari:1471 _Z3fooi:631
+CHECK-NEXT:  10: inline1:1000
+CHECK-NEXT:   1: 1000
+CHECK-NEXT:  10: inline2:2000
+CHECK-NEXT:   1: 2000
+CHECK-NEXT: _Z3bari:20301:1437
+CHECK-NEXT:  1: 1437
+CHECK-NEXT:  !Flat
+CHECK-NEXT: _Z3fooi:7711:610
+CHECK-NEXT:  1: 610
+CHECK-NEXT:  !Flat
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1f6efa788b1b8e..3b775de11e0405 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -208,10 +208,10 @@ cl::opt<bool> GenPartialProfile(
     cl::sub(MergeSubcommand),
     cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
 cl::opt<bool> SplitLayout(
-    "split_layout", cl::init(false), cl::Hidden,
+    "split-layout", cl::init(false), cl::Hidden,
     cl::sub(MergeSubcommand),
     cl::desc("Split the profile to two sections with one containing sample "
-             "profiles with inlined functions and the another not (only "
+             "profiles with inlined functions and the other without (only "
              "meaningful for -extbinary)"));
 cl::opt<std::string> SupplInstrWithSample(
     "supplement-instr-with-sample", cl::init(""), cl::Hidden,
@@ -1473,6 +1473,13 @@ static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
                                   sampleprof::ProfileSymbolList &WriterList,
                                   bool CompressAllSections, bool UseMD5,
                                   bool GenPartialProfile) {
+  if (SplitLayout) {
+    if (OutputFormat == PF_Binary)
+      warn("-split-layout is ignored. Specify -extbinary to enable it");
+    else
+      Writer.setUseCtxSplitLayout();
+  }
+
   populateProfileSymbolList(Buffer, WriterList);
   if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
     warn("Profile Symbol list is not empty but the output format is not "
@@ -1498,12 +1505,6 @@ static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
     else
       Writer.setPartialProfile();
   }
-  if (SplitLayout) {
-    if (OutputFormat != PF_Ext_Binary)
-      warn("-split-layout is ignored. Specify -extbinary to enable it");
-    else
-      Writer.resetSecLayout(SectionLayout::CtxSplitLayout);
-  }
 }
 
 static void mergeSampleProfile(const WeightedFileVector &Inputs,

>From 44c28cf6b4c01a7839410e72c2ce2c9143344413 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Wed, 28 Aug 2024 19:51:36 -0400
Subject: [PATCH 3/3] Enforce skipflatprof for text format sample profile

---
 .../llvm/ProfileData/SampleProfReader.h       | 16 +++++-------
 llvm/lib/ProfileData/SampleProfReader.cpp     | 26 ++++++++++++++++---
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index f4bdc6525308d2..246cc9c7caa482 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -484,9 +484,9 @@ class SampleProfileReader {
   /// are present.
   virtual void setProfileUseMD5() { ProfileIsMD5 = true; }
 
-  /// Don't read profile without context if the flag is set. This is only meaningful
-  /// for ExtBinary format.
-  virtual void setSkipFlatProf(bool Skip) {}
+  /// Don't read profile without context if the flag is set.
+  void setSkipFlatProf(bool Skip) { SkipFlatProf = Skip; }
+
   /// Return whether any name in the profile contains ".__uniq." suffix.
   virtual bool hasUniqSuffix() { return false; }
 
@@ -552,6 +552,10 @@ class SampleProfileReader {
   /// Whether the profile uses MD5 for Sample Contexts and function names. This
   /// can be one-way overriden by the user to force use MD5.
   bool ProfileIsMD5 = false;
+
+  /// If SkipFlatProf is true, skip functions marked with !Flat in text mode or
+  /// sections with SecFlagFlat flag in ExtBinary mode.
+  bool SkipFlatProf = false;
 };
 
 class SampleProfileReaderText : public SampleProfileReader {
@@ -754,10 +758,6 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   /// The set containing the functions to use when compiling a module.
   DenseSet<StringRef> FuncsToUse;
 
-  /// If SkipFlatProf is true, skip the sections with
-  /// SecFlagFlat flag.
-  bool SkipFlatProf = false;
-
 public:
   SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
                                    LLVMContext &C, SampleProfileFormat Format)
@@ -779,8 +779,6 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
     return std::move(ProfSymList);
   };
-
-  void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
 };
 
 class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index a62d46ef4554ec..b9e39e914c8079 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -215,7 +215,8 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
                       uint64_t &NumSamples, uint32_t &LineOffset,
                       uint32_t &Discriminator, StringRef &CalleeName,
                       DenseMap<StringRef, uint64_t> &TargetCountMap,
-                      uint64_t &FunctionHash, uint32_t &Attributes) {
+                      uint64_t &FunctionHash, uint32_t &Attributes,
+                      bool &IsFlat) {
   for (Depth = 0; Input[Depth] == ' '; Depth++)
     ;
   if (Depth == 0)
@@ -226,8 +227,10 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
     // This metadata is only for manual inspection only. We already created a
     // FunctionSamples and put it in the profile map, so there is no point
     // to skip profiles even they have no use for ThinLTO.
-    if (Input == StringRef(" !Flat"))
+    if (Input == StringRef(" !Flat")) {
+      IsFlat = true;
       return true;
+    }
     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
   }
 
@@ -330,6 +333,8 @@ std::error_code SampleProfileReaderText::readImpl() {
   // top-level or nested function profile.
   uint32_t DepthMetadata = 0;
 
+  std::vector<SampleContext *> FlatSamples;
+
   ProfileIsFS = ProfileIsFSDisciminator;
   FunctionSamples::ProfileIsFS = ProfileIsFS;
   for (; !LineIt.is_at_eof(); ++LineIt) {
@@ -373,9 +378,10 @@ std::error_code SampleProfileReaderText::readImpl() {
       LineType LineTy;
       uint64_t FunctionHash = 0;
       uint32_t Attributes = 0;
+      bool IsFlat = false;
       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
                      Discriminator, FName, TargetCountMap, FunctionHash,
-                     Attributes)) {
+                     Attributes, IsFlat)) {
         reportError(LineIt.line_number(),
                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
                         *LineIt);
@@ -431,12 +437,26 @@ std::error_code SampleProfileReaderText::readImpl() {
         if (Attributes & (uint32_t)ContextShouldBeInlined)
           ProfileIsPreInlined = true;
         DepthMetadata = Depth;
+        if (IsFlat) {
+          if (Depth == 1)
+            FlatSamples.push_back(&FProfile.getContext());
+          else
+            Ctx.diagnose(DiagnosticInfoSampleProfile(
+                Buffer->getBufferIdentifier(), LineIt.line_number(),
+                "!Flat may only be used at top level function.", DS_Warning));
+        }
         break;
       }
       }
     }
   }
 
+  // Honor the option to skip flat functions. Since they are already added to
+  // the profile map, remove them all here.
+  if (SkipFlatProf)
+    for (SampleContext *FlatSample : FlatSamples)
+      Profiles.erase(*FlatSample);
+
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
   ProfileIsCS = (CSProfileCount > 0);



More information about the llvm-commits mailing list