[llvm] 6c27c61 - [PGO] Improve the working set size heuristics under the partial sample PGO.

Hiroshi Yamauchi via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 1 10:29:36 PDT 2020


Author: Hiroshi Yamauchi
Date: 2020-06-01T10:29:23-07:00
New Revision: 6c27c61d32fd2951a290c6d4363bd495f6feae96

URL: https://github.com/llvm/llvm-project/commit/6c27c61d32fd2951a290c6d4363bd495f6feae96
DIFF: https://github.com/llvm/llvm-project/commit/6c27c61d32fd2951a290c6d4363bd495f6feae96.diff

LOG: [PGO] Improve the working set size heuristics under the partial sample PGO.

Summary:
The working set size heuristics (ProfileSummaryInfo::hasHugeWorkingSetSize)
under the partial sample PGO may not be accurate because the profile is partial
and the number of hot profile counters in the ProfileSummary may not reflect the
actual working set size of the program being compiled.

To improve this, the (approximated) ratio of the the number of profile counters
of the program being compiled to the number of profile counters in the partial
sample profile is computed (which is called the partial profile ratio) and the
working set size of the profile is scaled by this ratio to reflect the working
set size of the program being compiled and used for the working set size
heuristics.

The partial profile ratio is approximated based on the number of the basic
blocks in the program and the NumCounts field in the ProfileSummary and computed
through the thin LTO indexing. This means that there is the limitation that the
scaled working set size is available to the thin LTO post link passes only.

Reviewers: davidxl

Subscribers: mgorny, eraman, hiraditya, steven_wu, dexonsmith, arphaman, dang, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79831

Added: 
    

Modified: 
    llvm/include/llvm/IR/Module.h
    llvm/include/llvm/IR/ProfileSummary.h
    llvm/lib/Analysis/ProfileSummaryInfo.cpp
    llvm/lib/IR/Module.cpp
    llvm/lib/LTO/LTOBackend.cpp
    llvm/lib/Transforms/IPO/FunctionImport.cpp
    llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp
    llvm/unittests/IR/ModuleTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index ead003007904..36d58661ae4c 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -46,6 +46,7 @@ class FunctionType;
 class GVMaterializer;
 class LLVMContext;
 class MemoryBuffer;
+class ModuleSummaryIndex;
 class Pass;
 class RandomNumberGenerator;
 template <class PtrType> class SmallPtrSetImpl;
@@ -882,6 +883,10 @@ class Module {
 
   /// Take ownership of the given memory buffer.
   void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);
+
+  /// Set the partial sample profile ratio in the profile summary module flag,
+  /// if applicable.
+  void setPartialSampleProfileRatio(const ModuleSummaryIndex &Index);
 };
 
 /// Given "llvm.used" or "llvm.compiler.used" as a global name, collect

diff  --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h
index 00af0c5e05c8..889568e7946b 100644
--- a/llvm/include/llvm/IR/ProfileSummary.h
+++ b/llvm/include/llvm/IR/ProfileSummary.h
@@ -59,7 +59,8 @@ class ProfileSummary {
   bool Partial = false;
   /// This approximately represents the ratio of the number of profile counters
   /// of the program being built to the number of profile counters in the
-  /// partial sample profile. When 'Partial' is false, it is undefined.
+  /// partial sample profile. When 'Partial' is false, it is undefined. This is
+  /// currently only available under thin LTO mode.
   double PartialProfileRatio = 0;
   /// Return detailed summary as metadata.
   Metadata *getDetailedSummaryMD(LLVMContext &Context);

diff  --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 3360fd4c37c0..e3a76a6d075e 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -70,6 +70,23 @@ static cl::opt<bool> PartialProfile(
     "partial-profile", cl::Hidden, cl::init(false),
     cl::desc("Specify the current profile is used as a partial profile."));
 
+cl::opt<bool> ScalePartialSampleProfileWorkingSetSize(
+    "scale-partial-sample-profile-working-set-size", cl::Hidden,
+    cl::init(false),
+    cl::desc(
+        "If true, scale the working set size of the partial sample profile "
+        "by the partial profile ratio to reflect the size of the program "
+        "being compiled."));
+
+static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor(
+    "partial-sample-profile-working-set-size-scale-factor", cl::Hidden,
+    cl::init(0.008),
+    cl::desc("The scale factor used to scale the working set size of the "
+             "partial sample profile along with the partial profile ratio. "
+             "This includes the factor of the profile counter per block "
+             "and the factor to scale the working set size to use the same "
+             "shared thresholds as PGO."));
+
 // Find the summary entry for a desired percentile of counts.
 static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
                                                         uint64_t Percentile) {
@@ -280,10 +297,23 @@ void ProfileSummaryInfo::computeThresholds() {
     ColdCountThreshold = ProfileSummaryColdCount;
   assert(ColdCountThreshold <= HotCountThreshold &&
          "Cold count threshold cannot exceed hot count threshold!");
-  HasHugeWorkingSetSize =
-      HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
-  HasLargeWorkingSetSize =
-      HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
+  if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
+    HasHugeWorkingSetSize =
+        HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
+    HasLargeWorkingSetSize =
+        HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
+  } else {
+    // Scale the working set size of the partial sample profile to reflect the
+    // size of the program being compiled.
+    double PartialProfileRatio = Summary->getPartialProfileRatio();
+    uint64_t ScaledHotEntryNumCounts =
+        static_cast<uint64_t>(HotEntry.NumCounts * PartialProfileRatio *
+                              PartialSampleProfileWorkingSetSizeScaleFactor);
+    HasHugeWorkingSetSize =
+        ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
+    HasLargeWorkingSetSize =
+        ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
+  }
 }
 
 Optional<uint64_t>

diff  --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 1416cdce9974..3ea181a9b48d 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/TypeFinder.h"
@@ -673,3 +674,23 @@ GlobalVariable *llvm::collectUsedGlobalVariables(
   }
   return GV;
 }
+
+void Module::setPartialSampleProfileRatio(const ModuleSummaryIndex &Index) {
+  if (auto *SummaryMD = getProfileSummary(/*IsCS*/ false)) {
+    std::unique_ptr<ProfileSummary> ProfileSummary(
+        ProfileSummary::getFromMD(SummaryMD));
+    if (ProfileSummary) {
+      if (ProfileSummary->getKind() != ProfileSummary::PSK_Sample ||
+          !ProfileSummary->isPartialProfile())
+        return;
+      uint64_t BlockCount = Index.getBlockCount();
+      uint32_t NumCounts = ProfileSummary->getNumCounts();
+      if (!NumCounts)
+        return;
+      double Ratio = (double)BlockCount / NumCounts;
+      ProfileSummary->setPartialProfileRatio(Ratio);
+      setProfileSummary(ProfileSummary->getMD(getContext()),
+                        ProfileSummary::PSK_Sample);
+    }
+  }
+}

diff  --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 22019e465ac1..79c528176f25 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -541,6 +541,10 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
     return DiagFileOrErr.takeError();
   auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
 
+  // Set the partial sample profile ratio in the profile summary module flag of
+  // the module, if applicable.
+  Mod.setPartialSampleProfileRatio(CombinedIndex);
+
   if (Conf.CodeGenOnly) {
     codegen(Conf, TM.get(), AddStream, Task, Mod);
     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));

diff  --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index a73ba84696e7..468bf19f2e48 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -1232,6 +1232,12 @@ Expected<bool> FunctionImporter::importFunctions(
     // have loaded all the required metadata!
     UpgradeDebugInfo(*SrcModule);
 
+    // Set the partial sample profile ratio in the profile summary module flag
+    // of the imported source module, if applicable, so that the profile summary
+    // module flag will match with that of the destination module when it's
+    // imported.
+    SrcModule->setPartialSampleProfileRatio(Index);
+
     // Link in the specified functions.
     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
                                &GlobalsToImport))

diff  --git a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp
index ae60c4143548..cbd2236e4cb3 100644
--- a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp
+++ b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp
@@ -23,6 +23,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
+extern llvm::cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
+
 namespace llvm {
 namespace {
 
@@ -42,7 +44,12 @@ class ProfileSummaryInfoTest : public testing::Test {
     BPI.reset(new BranchProbabilityInfo(F, *LI));
     return BlockFrequencyInfo(F, *BPI, *LI);
   }
-  std::unique_ptr<Module> makeLLVMModule(const char *ProfKind = nullptr) {
+  std::unique_ptr<Module> makeLLVMModule(const char *ProfKind = nullptr,
+                                         uint64_t NumCounts = 3,
+                                         uint64_t IsPartialProfile = 0,
+                                         double PartialProfileRatio = 0.0,
+                                         uint64_t HotNumCounts = 3,
+                                         uint64_t ColdNumCounts = 10) {
     const char *ModuleString =
         "define i32 @g(i32 %x) !prof !21 {{\n"
         "  ret i32 0\n"
@@ -83,27 +90,32 @@ class ProfileSummaryInfoTest : public testing::Test {
         "!22 = !{{!\"function_entry_count\", i64 100}\n"
         "!23 = !{{!\"branch_weights\", i32 64, i32 4}\n"
         "{0}";
-    const char *SummaryString = "!llvm.module.flags = !{{!1}"
-                                "!1 = !{{i32 1, !\"ProfileSummary\", !2}"
-                                "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10}"
-                                "!3 = !{{!\"ProfileFormat\", !\"{0}\"}"
-                                "!4 = !{{!\"TotalCount\", i64 10000}"
-                                "!5 = !{{!\"MaxCount\", i64 10}"
-                                "!6 = !{{!\"MaxInternalCount\", i64 1}"
-                                "!7 = !{{!\"MaxFunctionCount\", i64 1000}"
-                                "!8 = !{{!\"NumCounts\", i64 3}"
-                                "!9 = !{{!\"NumFunctions\", i64 3}"
-                                "!10 = !{{!\"DetailedSummary\", !11}"
-                                "!11 = !{{!12, !13, !14}"
-                                "!12 = !{{i32 10000, i64 1000, i32 1}"
-                                "!13 = !{{i32 999000, i64 300, i32 3}"
-                                "!14 = !{{i32 999999, i64 5, i32 10}";
+    const char *SummaryString =
+        "!llvm.module.flags = !{{!1}\n"
+        "!1 = !{{i32 1, !\"ProfileSummary\", !2}\n"
+        "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}\n"
+        "!3 = !{{!\"ProfileFormat\", !\"{0}\"}\n"
+        "!4 = !{{!\"TotalCount\", i64 10000}\n"
+        "!5 = !{{!\"MaxCount\", i64 10}\n"
+        "!6 = !{{!\"MaxInternalCount\", i64 1}\n"
+        "!7 = !{{!\"MaxFunctionCount\", i64 1000}\n"
+        "!8 = !{{!\"NumCounts\", i64 {1}}\n"
+        "!9 = !{{!\"NumFunctions\", i64 3}\n"
+        "!10 = !{{!\"IsPartialProfile\", i64 {2}}\n"
+        "!11 = !{{!\"PartialProfileRatio\", double {3}}\n"
+        "!12 = !{{!\"DetailedSummary\", !13}\n"
+        "!13 = !{{!14, !15, !16}\n"
+        "!14 = !{{i32 10000, i64 1000, i32 1}\n"
+        "!15 = !{{i32 990000, i64 300, i32 {4}}\n"
+        "!16 = !{{i32 999999, i64 5, i32 {5}}\n";
     SMDiagnostic Err;
-    if (ProfKind)
-      return parseAssemblyString(
-          formatv(ModuleString, formatv(SummaryString, ProfKind).str()).str(),
-          Err, C);
-    else
+    if (ProfKind) {
+      auto Summary =
+          formatv(SummaryString, ProfKind, NumCounts, IsPartialProfile,
+                  PartialProfileRatio, HotNumCounts, ColdNumCounts)
+              .str();
+      return parseAssemblyString(formatv(ModuleString, Summary).str(), Err, C);
+    } else
       return parseAssemblyString(formatv(ModuleString, "").str(), Err, C);
   }
 };
@@ -280,6 +292,7 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) {
   ProfileSummaryInfo PSI = buildPSI(M.get());
   EXPECT_TRUE(PSI.hasProfileSummary());
   EXPECT_TRUE(PSI.hasSampleProfile());
+  EXPECT_FALSE(PSI.hasPartialSampleProfile());
 
   BasicBlock &BB0 = F->getEntryBlock();
   BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0);
@@ -373,5 +386,47 @@ TEST_F(ProfileSummaryInfoTest, SampleProfNoFuncEntryCount) {
   EXPECT_FALSE(PSI.isFunctionColdInCallGraphNthPercentile(990000, F, BFI));
 }
 
+TEST_F(ProfileSummaryInfoTest, PartialSampleProfWorkingSetSize) {
+  ScalePartialSampleProfileWorkingSetSize.setValue(true);
+
+  // With PartialProfileRatio unset (zero.)
+  auto M1 = makeLLVMModule("SampleProfile", /*NumCounts*/ 3,
+                           /*IsPartialProfile*/ 1,
+                           /*PartialProfileRatio*/ 0.0,
+                           /*HotNumCounts*/ 3, /*ColdNumCounts*/ 10);
+  ProfileSummaryInfo PSI1 = buildPSI(M1.get());
+  EXPECT_TRUE(PSI1.hasProfileSummary());
+  EXPECT_TRUE(PSI1.hasSampleProfile());
+  EXPECT_TRUE(PSI1.hasPartialSampleProfile());
+  EXPECT_FALSE(PSI1.hasHugeWorkingSetSize());
+  EXPECT_FALSE(PSI1.hasLargeWorkingSetSize());
+
+  // With PartialProfileRatio set (non-zero) and a small working set size.
+  auto M2 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235,
+                           /*IsPartialProfile*/ 1,
+                           /*PartialProfileRatio*/ 0.00000012,
+                           /*HotNumCounts*/ 3102082,
+                           /*ColdNumCounts*/ 18306149);
+  ProfileSummaryInfo PSI2 = buildPSI(M2.get());
+  EXPECT_TRUE(PSI2.hasProfileSummary());
+  EXPECT_TRUE(PSI2.hasSampleProfile());
+  EXPECT_TRUE(PSI2.hasPartialSampleProfile());
+  EXPECT_FALSE(PSI2.hasHugeWorkingSetSize());
+  EXPECT_FALSE(PSI2.hasLargeWorkingSetSize());
+
+  // With PartialProfileRatio is set (non-zero) and a large working set size.
+  auto M3 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235,
+                           /*IsPartialProfile*/ 1,
+                           /*PartialProfileRatio*/ 0.9,
+                           /*HotNumCounts*/ 3102082,
+                           /*ColdNumCounts*/ 18306149);
+  ProfileSummaryInfo PSI3 = buildPSI(M3.get());
+  EXPECT_TRUE(PSI3.hasProfileSummary());
+  EXPECT_TRUE(PSI3.hasSampleProfile());
+  EXPECT_TRUE(PSI3.hasPartialSampleProfile());
+  EXPECT_TRUE(PSI3.hasHugeWorkingSetSize());
+  EXPECT_TRUE(PSI3.hasLargeWorkingSetSize());
+}
+
 } // end anonymous namespace
 } // end namespace llvm

diff  --git a/llvm/unittests/IR/ModuleTest.cpp b/llvm/unittests/IR/ModuleTest.cpp
index 7b34d5d0ee55..67338f797d3a 100644
--- a/llvm/unittests/IR/ModuleTest.cpp
+++ b/llvm/unittests/IR/ModuleTest.cpp
@@ -9,6 +9,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/RandomNumberGenerator.h"
 #include "gtest/gtest.h"
@@ -121,4 +122,40 @@ TEST(ModuleTest, setProfileSummary) {
   delete PS;
 }
 
+TEST(ModuleTest, setPartialSampleProfileRatio) {
+  const char *IRString = R"IR(
+  !llvm.module.flags = !{!0}
+
+  !0 = !{i32 1, !"ProfileSummary", !1}
+  !1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11}
+  !2 = !{!"ProfileFormat", !"SampleProfile"}
+  !3 = !{!"TotalCount", i64 10000}
+  !4 = !{!"MaxCount", i64 10}
+  !5 = !{!"MaxInternalCount", i64 1}
+  !6 = !{!"MaxFunctionCount", i64 1000}
+  !7 = !{!"NumCounts", i64 200}
+  !8 = !{!"NumFunctions", i64 3}
+  !9 = !{!"IsPartialProfile", i64 1}
+  !10 = !{!"PartialProfileRatio", double 0.0}
+  !11 = !{!"DetailedSummary", !12}
+  !12 = !{!13, !14, !15}
+  !13 = !{i32 10000, i64 1000, i32 1}
+  !14 = !{i32 990000, i64 300, i32 10}
+  !15 = !{i32 999999, i64 5, i32 100}
+  )IR";
+
+  SMDiagnostic Err;
+  LLVMContext Context;
+  std::unique_ptr<Module> M = parseAssemblyString(IRString, Err, Context);
+  ModuleSummaryIndex Index(/*HaveGVs*/ false);
+  const unsigned BlockCount = 100;
+  const unsigned NumCounts = 200;
+  Index.setBlockCount(BlockCount);
+  M->setPartialSampleProfileRatio(Index);
+  double Ratio = (double)BlockCount / NumCounts;
+  std::unique_ptr<ProfileSummary> ProfileSummary(
+      ProfileSummary::getFromMD(M->getProfileSummary(/*IsCS*/ false)));
+  EXPECT_EQ(Ratio, ProfileSummary->getPartialProfileRatio());
+}
+
 } // end namespace


        


More information about the llvm-commits mailing list