[llvm] f0f70ae - [CSSPGO] Do not recount callee samples when computing profile summary for nested CS profile.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 11 09:06:12 PST 2022


Author: Hongtao Yu
Date: 2022-02-11T09:05:51-08:00
New Revision: f0f70ae674f5b213f3508b774df6f27384fd2046

URL: https://github.com/llvm/llvm-project/commit/f0f70ae674f5b213f3508b774df6f27384fd2046
DIFF: https://github.com/llvm/llvm-project/commit/f0f70ae674f5b213f3508b774df6f27384fd2046.diff

LOG: [CSSPGO] Do not recount callee samples when computing profile summary for nested CS profile.

When generating nested CS profile with all calling contexts of a function duplicated into a base profile under `--generate-merged-base-profiles`, do not recount callee samples when computing profile summary. This fixes the profile summary mismatch between flat cs profile and nested cs profile, for both extbinary and text format.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D119494

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
    llvm/lib/ProfileData/SampleProf.cpp
    llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
    llvm/test/tools/llvm-profgen/cs-preinline.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index bad2139fe8f0e..2e255b90d1b5f 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -413,6 +413,8 @@ enum ContextAttributeMask {
   ContextNone = 0x0,
   ContextWasInlined = 0x1,      // Leaf of context was inlined in previous build
   ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
+  ContextDuplicatedIntoBase =
+      0x4, // Leaf of context is duplicated into the base profile
 };
 
 // Represents a context frame with function name and line location

diff  --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index bbb640cfaee8c..8ec26b0a0e650 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -110,7 +110,13 @@ void SampleProfileSummaryBuilder::addRecord(
     NumFunctions++;
     if (FS.getHeadSamples() > MaxFunctionCount)
       MaxFunctionCount = FS.getHeadSamples();
+  } else if (FS.getContext().hasAttribute(
+                 sampleprof::ContextDuplicatedIntoBase)) {
+    // Do not recount callee samples if they are already merged into their base
+    // profiles. This can happen to CS nested profile.
+    return;
   }
+
   for (const auto &I : FS.getBodySamples()) {
     uint64_t Count = I.second.getSamples();
       addCount(Count);

diff  --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 9b01a386a360a..5e11df6b6aad4 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -531,8 +531,14 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
     // thus done optionally. It is seen that duplicating context profiles into
     // base profiles improves the code quality for thinlto build by allowing a
     // profile in the prelink phase for to-be-fully-inlined functions.
-    if (!NodeProfile || GenerateMergedBaseProfiles)
+    if (!NodeProfile) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+    } else if (GenerateMergedBaseProfiles) {
+      ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+      auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
+      SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
+          ContextDuplicatedIntoBase);
+    }
 
     // Contexts coming with a `ContextShouldBeInlined` attribute indicate this
     // is a preinliner-computed profile.

diff  --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
index fe28f57676b03..03d15671f1fd9 100644
--- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
+++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
@@ -6,8 +6,12 @@ RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sa
 RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin
 RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace
 RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE
-RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
-RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT
+RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
+RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT
+RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
+RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY
+RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY
+RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY
 
 
 ; CHECK:main:1968679:12
@@ -60,8 +64,8 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
 ; RECOUNT-NEXT:   3: 287884
 ; RECOUNT-NEXT:   4: 287864 _Z3fibi:315608
 ; RECOUNT-NEXT:   15: 23
-; RECOUNT-NEXT:   !Attributes: 2
-; RECOUNT-NEXT:  !Attributes: 2
+; RECOUNT-NEXT:   !Attributes: 6
+; RECOUNT-NEXT:  !Attributes: 6
 ; RECOUNT-NEXT: 3.1: _Z5funcBi:500973
 ; RECOUNT-NEXT:  0: 19
 ; RECOUNT-NEXT:  1: 19 _Z8funcLeafi:20
@@ -74,8 +78,8 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
 ; RECOUNT-NEXT:   10: 23324
 ; RECOUNT-NEXT:   11: 23327 _Z3fibi:25228
 ; RECOUNT-NEXT:   15: 11
-; RECOUNT-NEXT:   !Attributes: 2
-; RECOUNT-NEXT:  !Attributes: 2
+; RECOUNT-NEXT:   !Attributes: 6
+; RECOUNT-NEXT:  !Attributes: 6
 ; RECOUNT-NEXT:_Z8funcLeafi:1968152:31
 ; RECOUNT-NEXT: 0: 21
 ; RECOUNT-NEXT: 1: 21
@@ -95,7 +99,7 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
 ; RECOUNT-NEXT:  3: 287884
 ; RECOUNT-NEXT:  4: 287864 _Z3fibi:315608
 ; RECOUNT-NEXT:  15: 23
-; RECOUNT-NEXT:  !Attributes: 2
+; RECOUNT-NEXT:  !Attributes: 6
 ; RECOUNT-NEXT: !Attributes: 2
 ; RECOUNT-NEXT:_Z5funcBi:501213:32
 ; RECOUNT-NEXT: 0: 32
@@ -109,7 +113,7 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
 ; RECOUNT-NEXT:  10: 23324
 ; RECOUNT-NEXT:  11: 23327 _Z3fibi:25228
 ; RECOUNT-NEXT:  15: 11
-; RECOUNT-NEXT:  !Attributes: 2
+; RECOUNT-NEXT:  !Attributes: 6
 
 ; PROBE:main:1968679:12
 ; PROBE-NEXT: 2: 24
@@ -153,3 +157,27 @@ RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace -check-pr
 
 
 ; PREINLINE: ProfileSummarySection {{.*}} Flags: {context-nested}
+
+
+; SUMMARY:      Total functions: 4
+; SUMMARY-NEXT: Maximum function count: 32
+; SUMMARY-NEXT: Maximum block count: 362830
+; SUMMARY-NEXT: Total number of blocks: 16
+; SUMMARY-NEXT: Total count: 772562
+; SUMMARY-NEXT: Detailed summary:
+; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 1 percentage of the total counts.
+; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 10 percentage of the total counts.
+; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 20 percentage of the total counts.
+; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 30 percentage of the total counts.
+; SUMMARY-NEXT: 1 blocks with count >= 362830 account for 40 percentage of the total counts.
+; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 50 percentage of the total counts.
+; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 60 percentage of the total counts.
+; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 70 percentage of the total counts.
+; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 80 percentage of the total counts.
+; SUMMARY-NEXT: 2 blocks with count >= 362805 account for 90 percentage of the total counts.
+; SUMMARY-NEXT: 3 blocks with count >= 23327 account for 95 percentage of the total counts.
+; SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99 percentage of the total counts.
+; SUMMARY-NEXT: 4 blocks with count >= 23324 account for 99.9 percentage of the total counts.
+; SUMMARY-NEXT: 11 blocks with count >= 24 account for 99.99 percentage of the total counts.
+; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.999 percentage of the total counts.
+; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.9999 percentage of the total counts.

diff  --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test
index 2ade9cb0ce431..8afa811674fd7 100644
--- a/llvm/test/tools/llvm-profgen/cs-preinline.test
+++ b/llvm/test/tools/llvm-profgen/cs-preinline.test
@@ -65,4 +65,4 @@
 ; CHECK-PREINL-NEST-NEXT: 65526: 14
 ; CHECK-PREINL-NEST-NEXT: 3.1: bar:84
 ; CHECK-PREINL-NEST-NEXT:  1: 14
-; CHECK-PREINL-NEST-NEXT:  !Attributes: 3
+; CHECK-PREINL-NEST-NEXT:  !Attributes: 7


        


More information about the llvm-commits mailing list