[llvm] [CSSPGO] Error out if the checksum mismatch is high (PR #84097)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 26 16:34:19 PDT 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/84097

>From 90dea0e611f466dc479f70cd37e2bf7618fef9e9 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 29 Feb 2024 18:04:37 -0800
Subject: [PATCH 1/7] [CSSPGO] Error out if the checksum mismatch is high

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp | 81 +++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 9a8040bc4b064e..e396945ad83e75 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -234,6 +234,24 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
     cl::desc(
         "Skip relative hotness check for ICP up to given number of targets."));
 
+static cl::opt<unsigned> ChecksumMismatchFuncHotBlockSkip(
+    "checksum-mismatch-func-hot-block-skip", cl::Hidden, cl::init(100),
+    cl::desc("For checksum-mismatch error check, skip checking the function "
+             "whose num of hot(on average) blocks is smaller than the "
+             "given number."));
+
+static cl::opt<unsigned> ChecksumMismatchNumFuncSkip(
+    "checksum-mismatch-num-func-skip", cl::Hidden, cl::init(50),
+    cl::desc("For checksum-mismatch error check, skip the check if the total "
+             "number of selected function is smaller than the given number."));
+
+static cl::opt<unsigned> ChecksumMismatchErrorThreshold(
+    "checksum-mismatch-error-threshold", cl::Hidden, cl::init(80),
+    cl::desc(
+        "For checksum-mismatch error check, error out if the percentage of "
+        "function mismatched-checksum is higher than the given percentage "
+        "threshold"));
+
 static cl::opt<bool> CallsitePrioritizedInline(
     "sample-profile-prioritized-inline", cl::Hidden,
 
@@ -630,6 +648,8 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
   std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
   std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
   void generateMDProfMetadata(Function &F);
+  bool errorIfHighChecksumMismatch(Module &M, ProfileSummaryInfo *PSI,
+                                   const SampleProfileMap &Profiles);
 
   /// Map from function name to Function *. Used to find the function from
   /// the function name. If the function name contains suffix, additional
@@ -2187,6 +2207,61 @@ bool SampleProfileLoader::doInitialization(Module &M,
   return true;
 }
 
+// Note that this is a module-level check. Even if one module is errored out,
+// the entire build will be errored out. However, the user could make big
+// changes to functions in single module but those changes might not be
+// performance significant to the whole binary. Therefore, we use a conservative
+// approach to make sure we only error out if it globally impacts the binary
+// performance. To achieve this, we use heuristics to select a reasonable
+// big set of functions that are supposed to be globally performance
+// significant, only compute and check the mismatch within those functions. The
+// function selection is based on two criteria: 1) The function is "hot" enough,
+// which is tuned by a hotness-based flag(ChecksumMismatchFuncHotBlockSkip). 2)
+// The num of function is large enough which is tuned by the
+// ChecksumMismatchNumFuncSkip flag.
+bool SampleProfileLoader::errorIfHighChecksumMismatch(
+    Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
+  assert(FunctionSamples::ProfileIsProbeBased &&
+         "Only support for probe-based profile");
+  uint64_t TotalSelectedFunc = 0;
+  uint64_t NumMismatchedFunc = 0;
+  for (const auto &I : Profiles) {
+    const auto &FS = I.second;
+    const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
+    if (!FuncDesc)
+      continue;
+
+    // We want to select a set of functions that are globally performance
+    // significant, in other words, if those functions profiles are
+    // checksum-mismatched and dropped, the whole binary will likely be
+    // impacted, so here we use a hotness-based threshold to control the
+    // selection.
+    if (FS.getTotalSamples() <
+        ChecksumMismatchFuncHotBlockSkip * PSI->getOrCompHotCountThreshold())
+      continue;
+
+    TotalSelectedFunc++;
+    if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
+      NumMismatchedFunc++;
+  }
+  // Make sure that the num of selected function is not too small to distinguish
+  // from the user's benign changes.
+  if (TotalSelectedFunc < ChecksumMismatchNumFuncSkip)
+    return false;
+
+  // Finally check the mismatch percentage against the threshold.
+  if (NumMismatchedFunc * 100 >=
+      TotalSelectedFunc * ChecksumMismatchErrorThreshold) {
+    auto &Ctx = M.getContext();
+    const char *Msg =
+        "The FDO profile is too old and will cause big performance regression, "
+        "please drop the profile and collect a new one.";
+    Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
+    return true;
+  }
+  return false;
+}
+
 void SampleProfileMatcher::findIRAnchors(
     const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
   // For inlined code, recover the original callsite and callee by finding the
@@ -2718,6 +2793,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
                         ProfileSummary::PSK_Sample);
     PSI->refresh();
   }
+
+  // Error out if the profile checksum mismatch is too high.
+  if (FunctionSamples::ProfileIsProbeBased &&
+      errorIfHighChecksumMismatch(M, PSI, Reader->getProfiles()))
+    return false;
+
   // Compute the total number of samples collected in this profile.
   for (const auto &I : Reader->getProfiles())
     TotalCollectedSamples += I.second.getTotalSamples();

>From f0be4853dd6c2fa029e255164b3a0cbd3935fbcf Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 5 Mar 2024 16:06:53 -0800
Subject: [PATCH 2/7] add test

---
 .../SampleProfile/pseudo-probe-profile-mismatch-error.ll     | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll

diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
new file mode 100644
index 00000000000000..84a2a0ff2eb6ad
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
@@ -0,0 +1,5 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof -checksum-mismatch-func-hot-block-skip=0 -checksum-mismatch-num-func-skip=1 -checksum-mismatch-error-threshold=1 -S 2>%t -o %t.ll
+; RUN: FileCheck %s --input-file %t
+
+; CHECK: error: [[*]]: The FDO profile is too old and will cause big performance regression, please drop the profile and collect a new one.

>From 02bb76cccec274886704217ac276d1fbbfc560eb Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 15 Mar 2024 13:56:28 -0700
Subject: [PATCH 3/7] addressing comments

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 38 +++++++++----------
 .../pseudo-probe-profile-mismatch-error.ll    |  5 +--
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index e396945ad83e75..68d07c0125c396 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -240,17 +240,15 @@ static cl::opt<unsigned> ChecksumMismatchFuncHotBlockSkip(
              "whose num of hot(on average) blocks is smaller than the "
              "given number."));
 
-static cl::opt<unsigned> ChecksumMismatchNumFuncSkip(
-    "checksum-mismatch-num-func-skip", cl::Hidden, cl::init(50),
-    cl::desc("For checksum-mismatch error check, skip the check if the total "
-             "number of selected function is smaller than the given number."));
+static cl::opt<unsigned> MinfuncsForStalenessError(
+    "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
+    cl::desc("Skip the check if the number of hot functions is smaller than "
+             "the given number."));
 
-static cl::opt<unsigned> ChecksumMismatchErrorThreshold(
-    "checksum-mismatch-error-threshold", cl::Hidden, cl::init(80),
-    cl::desc(
-        "For checksum-mismatch error check, error out if the percentage of "
-        "function mismatched-checksum is higher than the given percentage "
-        "threshold"));
+static cl::opt<unsigned> PrecentMismatchForStalenessError(
+    "precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
+    cl::desc("Reject the profile if the mismatch percent is higher than the "
+             "given number"));
 
 static cl::opt<bool> CallsitePrioritizedInline(
     "sample-profile-prioritized-inline", cl::Hidden,
@@ -648,7 +646,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
   std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
   std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
   void generateMDProfMetadata(Function &F);
-  bool errorIfHighChecksumMismatch(Module &M, ProfileSummaryInfo *PSI,
+  bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,
                                    const SampleProfileMap &Profiles);
 
   /// Map from function name to Function *. Used to find the function from
@@ -2218,12 +2216,12 @@ bool SampleProfileLoader::doInitialization(Module &M,
 // function selection is based on two criteria: 1) The function is "hot" enough,
 // which is tuned by a hotness-based flag(ChecksumMismatchFuncHotBlockSkip). 2)
 // The num of function is large enough which is tuned by the
-// ChecksumMismatchNumFuncSkip flag.
-bool SampleProfileLoader::errorIfHighChecksumMismatch(
+// MinfuncsForStalenessError flag.
+bool SampleProfileLoader::rejectHighStalenessProfile(
     Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
   assert(FunctionSamples::ProfileIsProbeBased &&
          "Only support for probe-based profile");
-  uint64_t TotalSelectedFunc = 0;
+  uint64_t TotalHotFunc = 0;
   uint64_t NumMismatchedFunc = 0;
   for (const auto &I : Profiles) {
     const auto &FS = I.second;
@@ -2240,22 +2238,22 @@ bool SampleProfileLoader::errorIfHighChecksumMismatch(
         ChecksumMismatchFuncHotBlockSkip * PSI->getOrCompHotCountThreshold())
       continue;
 
-    TotalSelectedFunc++;
+    TotalHotFunc++;
     if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
       NumMismatchedFunc++;
   }
   // Make sure that the num of selected function is not too small to distinguish
   // from the user's benign changes.
-  if (TotalSelectedFunc < ChecksumMismatchNumFuncSkip)
+  if (TotalHotFunc < MinfuncsForStalenessError)
     return false;
 
   // Finally check the mismatch percentage against the threshold.
   if (NumMismatchedFunc * 100 >=
-      TotalSelectedFunc * ChecksumMismatchErrorThreshold) {
+      TotalHotFunc * PrecentMismatchForStalenessError) {
     auto &Ctx = M.getContext();
     const char *Msg =
-        "The FDO profile is too old and will cause big performance regression, "
-        "please drop the profile and collect a new one.";
+        "The input profile significantly mismatches current source code. "
+        "Please recollect profile to avoid performance regression.";
     Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
     return true;
   }
@@ -2796,7 +2794,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
 
   // Error out if the profile checksum mismatch is too high.
   if (FunctionSamples::ProfileIsProbeBased &&
-      errorIfHighChecksumMismatch(M, PSI, Reader->getProfiles()))
+      rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
     return false;
 
   // Compute the total number of samples collected in this profile.
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
index 84a2a0ff2eb6ad..daf616c45c46e0 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
@@ -1,5 +1,4 @@
 ; REQUIRES: x86_64-linux
-; RUN: opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof -checksum-mismatch-func-hot-block-skip=0 -checksum-mismatch-num-func-skip=1 -checksum-mismatch-error-threshold=1 -S 2>%t -o %t.ll
-; RUN: FileCheck %s --input-file %t
+; RUN: not opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof -checksum-mismatch-func-hot-block-skip=0  -min-functions-for-staleness-error=1  -precent-mismatch-for-staleness-error=1 -S 2>&1 | FileCheck %s
 
-; CHECK: error: [[*]]: The FDO profile is too old and will cause big performance regression, please drop the profile and collect a new one.
+; CHECK: error: {{.*}}: The input profile significantly mismatches current source code. Please recollect profile to avoid performance regression.

>From d61442c6df25fb71026efdfa68e9fa9fdb7722ff Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 18 Mar 2024 22:42:29 -0700
Subject: [PATCH 4/7] Use isHotCountNthPercentile and total_samples to decide
 if function is hot

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp       | 17 ++++++++---------
 .../pseudo-probe-profile-mismatch-error.ll      |  2 +-
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 68d07c0125c396..dc8780f632e0ff 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -234,11 +234,10 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
     cl::desc(
         "Skip relative hotness check for ICP up to given number of targets."));
 
-static cl::opt<unsigned> ChecksumMismatchFuncHotBlockSkip(
-    "checksum-mismatch-func-hot-block-skip", cl::Hidden, cl::init(100),
-    cl::desc("For checksum-mismatch error check, skip checking the function "
-             "whose num of hot(on average) blocks is smaller than the "
-             "given number."));
+static cl::opt<unsigned> HotFuncCutoffForStalenessError(
+    "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(999000),
+    cl::desc("Hot function cutoff for staleness error. It's percentile value "
+             "(multiplied by 10000), e.g. 995000 for 99.5 percentile."));
 
 static cl::opt<unsigned> MinfuncsForStalenessError(
     "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
@@ -647,7 +646,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
   std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
   void generateMDProfMetadata(Function &F);
   bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,
-                                   const SampleProfileMap &Profiles);
+                                  const SampleProfileMap &Profiles);
 
   /// Map from function name to Function *. Used to find the function from
   /// the function name. If the function name contains suffix, additional
@@ -2214,7 +2213,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
 // big set of functions that are supposed to be globally performance
 // significant, only compute and check the mismatch within those functions. The
 // function selection is based on two criteria: 1) The function is "hot" enough,
-// which is tuned by a hotness-based flag(ChecksumMismatchFuncHotBlockSkip). 2)
+// which is tuned by a hotness-based flag(HotFuncCutoffForStalenessError). 2)
 // The num of function is large enough which is tuned by the
 // MinfuncsForStalenessError flag.
 bool SampleProfileLoader::rejectHighStalenessProfile(
@@ -2234,8 +2233,8 @@ bool SampleProfileLoader::rejectHighStalenessProfile(
     // checksum-mismatched and dropped, the whole binary will likely be
     // impacted, so here we use a hotness-based threshold to control the
     // selection.
-    if (FS.getTotalSamples() <
-        ChecksumMismatchFuncHotBlockSkip * PSI->getOrCompHotCountThreshold())
+    if (!PSI->isHotCountNthPercentile(HotFuncCutoffForStalenessError,
+                                      FS.getTotalSamples()))
       continue;
 
     TotalHotFunc++;
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
index daf616c45c46e0..c6264f74b87a46 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
@@ -1,4 +1,4 @@
 ; REQUIRES: x86_64-linux
-; RUN: not opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof -checksum-mismatch-func-hot-block-skip=0  -min-functions-for-staleness-error=1  -precent-mismatch-for-staleness-error=1 -S 2>&1 | FileCheck %s
+; RUN: not opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof  -min-functions-for-staleness-error=1  -precent-mismatch-for-staleness-error=1 -S 2>&1 | FileCheck %s
 
 ; CHECK: error: {{.*}}: The input profile significantly mismatches current source code. Please recollect profile to avoid performance regression.

>From 9a210ad72487016cdb944de3363b346e320bdee7 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 19 Mar 2024 14:59:24 -0700
Subject: [PATCH 5/7] rephrase the flag description

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index dc8780f632e0ff..cb3b2be6979cc1 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -236,8 +236,9 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
 
 static cl::opt<unsigned> HotFuncCutoffForStalenessError(
     "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(999000),
-    cl::desc("Hot function cutoff for staleness error. It's percentile value "
-             "(multiplied by 10000), e.g. 995000 for 99.5 percentile."));
+    cl::desc("A function is considered hot for staleness error check if its "
+             "total sample count is above the specified percentile(multiplied "
+             "by 10000, e.g. 995000 for 99.5 percentile)."));
 
 static cl::opt<unsigned> MinfuncsForStalenessError(
     "min-functions-for-staleness-error", cl::Hidden, cl::init(50),

>From 30f09406dacb1fc46cd6f168718ab8984de70c04 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 20 Mar 2024 20:21:44 -0700
Subject: [PATCH 6/7] addressing comments and set hot-func-cutoff 800000

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp                    | 5 ++---
 .../SampleProfile/pseudo-probe-profile-mismatch-error.ll     | 3 +++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index cb3b2be6979cc1..701350a3abbecb 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -235,10 +235,9 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
         "Skip relative hotness check for ICP up to given number of targets."));
 
 static cl::opt<unsigned> HotFuncCutoffForStalenessError(
-    "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(999000),
+    "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000),
     cl::desc("A function is considered hot for staleness error check if its "
-             "total sample count is above the specified percentile(multiplied "
-             "by 10000, e.g. 995000 for 99.5 percentile)."));
+             "total sample count is above the specified percentile"));
 
 static cl::opt<unsigned> MinfuncsForStalenessError(
     "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
index c6264f74b87a46..2bb8f677f40ca8 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-error.ll
@@ -1,4 +1,7 @@
 ; REQUIRES: x86_64-linux
 ; RUN: not opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof  -min-functions-for-staleness-error=1  -precent-mismatch-for-staleness-error=1 -S 2>&1 | FileCheck %s
+; RUN: opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof  -min-functions-for-staleness-error=3  -precent-mismatch-for-staleness-error=70 -S 2>&1
+; RUN: opt < %S/pseudo-probe-profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof  -min-functions-for-staleness-error=4  -precent-mismatch-for-staleness-error=1 -S 2>&1
+
 
 ; CHECK: error: {{.*}}: The input profile significantly mismatches current source code. Please recollect profile to avoid performance regression.

>From 8a09260e006bdd08545ada3f9a461202f6cc5c2f Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 25 Mar 2024 11:53:21 -0700
Subject: [PATCH 7/7] tweak comments

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp | 27 +++++++++--------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 701350a3abbecb..2cbef8a7ae611d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -242,12 +242,12 @@ static cl::opt<unsigned> HotFuncCutoffForStalenessError(
 static cl::opt<unsigned> MinfuncsForStalenessError(
     "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
     cl::desc("Skip the check if the number of hot functions is smaller than "
-             "the given number."));
+             "the specified number."));
 
 static cl::opt<unsigned> PrecentMismatchForStalenessError(
     "precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
     cl::desc("Reject the profile if the mismatch percent is higher than the "
-             "given number"));
+             "given number."));
 
 static cl::opt<bool> CallsitePrioritizedInline(
     "sample-profile-prioritized-inline", cl::Hidden,
@@ -2207,15 +2207,13 @@ bool SampleProfileLoader::doInitialization(Module &M,
 // Note that this is a module-level check. Even if one module is errored out,
 // the entire build will be errored out. However, the user could make big
 // changes to functions in single module but those changes might not be
-// performance significant to the whole binary. Therefore, we use a conservative
-// approach to make sure we only error out if it globally impacts the binary
-// performance. To achieve this, we use heuristics to select a reasonable
-// big set of functions that are supposed to be globally performance
-// significant, only compute and check the mismatch within those functions. The
-// function selection is based on two criteria: 1) The function is "hot" enough,
-// which is tuned by a hotness-based flag(HotFuncCutoffForStalenessError). 2)
-// The num of function is large enough which is tuned by the
-// MinfuncsForStalenessError flag.
+// performance significant to the whole binary. Therefore, to avoid those false
+// positives, we select a reasonable big set of hot functions that are supposed
+// to be globally performance significant, only compute and check the mismatch
+// within those functions. The function selection is based on two criteria:
+// 1) The function is hot enough, which is tuned by a hotness-based
+// flag(HotFuncCutoffForStalenessError). 2) The num of function is large enough
+// which is tuned by the MinfuncsForStalenessError flag.
 bool SampleProfileLoader::rejectHighStalenessProfile(
     Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
   assert(FunctionSamples::ProfileIsProbeBased &&
@@ -2228,11 +2226,7 @@ bool SampleProfileLoader::rejectHighStalenessProfile(
     if (!FuncDesc)
       continue;
 
-    // We want to select a set of functions that are globally performance
-    // significant, in other words, if those functions profiles are
-    // checksum-mismatched and dropped, the whole binary will likely be
-    // impacted, so here we use a hotness-based threshold to control the
-    // selection.
+    // Use a hotness-based threshold to control the function selection.
     if (!PSI->isHotCountNthPercentile(HotFuncCutoffForStalenessError,
                                       FS.getTotalSamples()))
       continue;
@@ -2791,7 +2785,6 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
     PSI->refresh();
   }
 
-  // Error out if the profile checksum mismatch is too high.
   if (FunctionSamples::ProfileIsProbeBased &&
       rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
     return false;



More information about the llvm-commits mailing list