[llvm] 97e2aea - [AutoFDO] Use getHeadSamplesEstimate instead of getTotalSamples to compute profile callsite staleness

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 15 11:21:56 PST 2022


Author: wlei
Date: 2022-12-15T11:21:18-08:00
New Revision: 97e2aeab71c3acfad9dc9df000c88c78686d8092

URL: https://github.com/llvm/llvm-project/commit/97e2aeab71c3acfad9dc9df000c88c78686d8092
DIFF: https://github.com/llvm/llvm-project/commit/97e2aeab71c3acfad9dc9df000c88c78686d8092.diff

LOG: [AutoFDO] Use getHeadSamplesEstimate instead of getTotalSamples to compute profile callsite staleness

Fix two issues for profile staleness report.

1) It should be more accurate to use the sum of all entry count(`getHeadSamplesEstimate`) for the callsite samples than the total samples, since even the top-level callsite is mismatched, it does affect the inlining but it can still be merged into base profile and used later.

2) I accidentally missed to persist the num of mismatched callsite into binary.

Also added the asm testing to test the decoding of the section.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D140063

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/test/Transforms/SampleProfile/profile-mismatch.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index df5f91bc4499b..2c18b26c5a7d8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -430,8 +430,8 @@ class SampleProfileMatcher {
   const PseudoProbeManager *ProbeManager;
 
   // Profile mismatching statstics.
-  uint64_t TotalProfiledCallsite = 0;
-  uint64_t NumMismatchedCallsite = 0;
+  uint64_t TotalProfiledCallsites = 0;
+  uint64_t NumMismatchedCallsites = 0;
   uint64_t MismatchedCallsiteSamples = 0;
   uint64_t TotalCallsiteSamples = 0;
   uint64_t TotalProfiledFunc = 0;
@@ -2119,10 +2119,10 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
     uint64_t Count = I.second.getSamples();
     if (!I.second.getCallTargets().empty()) {
       TotalCallsiteSamples += Count;
-      TotalProfiledCallsite++;
+      TotalProfiledCallsites++;
       if (!MatchedCallsiteLocs.count(Loc)) {
         MismatchedCallsiteSamples += Count;
-        NumMismatchedCallsite++;
+        NumMismatchedCallsites++;
       }
     }
   }
@@ -2134,13 +2134,13 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
 
     uint64_t Count = 0;
     for (auto &FM : I.second) {
-      Count += FM.second.getTotalSamples();
+      Count += FM.second.getHeadSamplesEstimate();
     }
     TotalCallsiteSamples += Count;
-    TotalProfiledCallsite++;
+    TotalProfiledCallsites++;
     if (!MatchedCallsiteLocs.count(Loc)) {
       MismatchedCallsiteSamples += Count;
-      NumMismatchedCallsite++;
+      NumMismatchedCallsites++;
     }
   }
 }
@@ -2163,7 +2163,7 @@ void SampleProfileMatcher::detectProfileMismatch() {
              << ")"
              << " of samples are discarded due to function hash mismatch.\n";
     }
-    errs() << "(" << NumMismatchedCallsite << "/" << TotalProfiledCallsite
+    errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
            << ")"
            << " of callsites' profile are invalid and "
            << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
@@ -2183,6 +2183,9 @@ void SampleProfileMatcher::detectProfileMismatch() {
                                 MismatchedFuncHashSamples);
       ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
     }
+
+    ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
+    ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
     ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
                               MismatchedCallsiteSamples);
     ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);

diff  --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
index 8d6d13daae7cf..cf07974da27fe 100644
--- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
@@ -4,13 +4,32 @@
 ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
 ; RUN: llc < %t.ll -filetype=obj -o %t.obj
 ; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM
 
-; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (15/25) of samples are discarded due to callsite location mismatch.
 
-; CHECK-MD: ![[#]] = !{!"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 15, !"TotalCallsiteSamples", i64 25}
 
 ; CHECK-OBJ: .llvm_stats
 
+; CHECK-ASM: .section  .llvm_stats,"", at progbits
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii  "NumMismatchedCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "Mg=="
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii  "TotalProfiledCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "Mw=="
+; CHECK-ASM: .byte 25
+; CHECK-ASM: .ascii  "MismatchedCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "MTU="
+; CHECK-ASM: .byte 20
+; CHECK-ASM: .ascii  "TotalCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "MjU="
+
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
index ad88fa0b4d50f..ee492b805b13b 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
@@ -4,14 +4,49 @@
 ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
 ; RUN: llc < %t.ll -filetype=obj -o %t.obj
 ; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM
 
 ; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch.
 ; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
 
-; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 3, !"MismatchedFuncHashSamples", i64 10, !"TotalFuncHashSamples", i64 50, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 3, !"MismatchedFuncHashSamples", i64 10, !"TotalFuncHashSamples", i64 50, !"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
 
 ; CHECK-OBJ: .llvm_stats
 
+; CHECK-ASM: .section  .llvm_stats,"", at progbits
+; CHECK-ASM: .byte 21
+; CHECK-ASM: .ascii  "NumMismatchedFuncHash"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "MQ=="
+; CHECK-ASM: .byte 17
+; CHECK-ASM: .ascii  "TotalProfiledFunc"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "Mw=="
+; CHECK-ASM: .byte 25
+; CHECK-ASM: .ascii  "MismatchedFuncHashSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "MTA="
+; CHECK-ASM: .byte 20
+; CHECK-ASM: .ascii  "TotalFuncHashSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "NTA="
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii  "NumMismatchedCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "Mg=="
+; CHECK-ASM: .byte 22
+; CHECK-ASM: .ascii  "TotalProfiledCallsites"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "Mw=="
+; CHECK-ASM: .byte 25
+; CHECK-ASM: .ascii  "MismatchedCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "MjA="
+; CHECK-ASM: .byte 20
+; CHECK-ASM: .ascii  "TotalCallsiteSamples"
+; CHECK-ASM: .byte 4
+; CHECK-ASM: .ascii  "MzA="
+
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 


        


More information about the llvm-commits mailing list