[llvm] 1662cfa - [CSSPGO][CSProfileConverter] Remove call target samples when including callee samples into caller.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Fri May 13 09:19:40 PDT 2022


Author: Hongtao Yu
Date: 2022-05-13T09:19:32-07:00
New Revision: 1662cfa4be33edd94ab71f8aa83676eb8b378f4a

URL: https://github.com/llvm/llvm-project/commit/1662cfa4be33edd94ab71f8aa83676eb8b378f4a
DIFF: https://github.com/llvm/llvm-project/commit/1662cfa4be33edd94ab71f8aa83676eb8b378f4a.diff

LOG: [CSSPGO][CSProfileConverter] Remove call target samples when including callee samples into caller.

When a flat CS profile is converted to a nested profile, the call target samples for inlined callee contexts are left over in the callsite target map. This could cause indirect call promotion to function improperly. One issue is that the inlined callsites are treated with double amount of samples. The other is the inlined callsites are reconsidered for subsequent PGO ICP.

I'm fixing this by excluding call targets from the callsite for inlined targets. While fixing this I found that callsite target sum and the number of body samples for that callsite could be mismatched. {D122609} has an explanation and a fix for that on llvm-profgen side. For now I'm tolerating it in this change.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D125266

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/lib/ProfileData/SampleProf.cpp
    llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
    llvm/test/tools/llvm-profgen/cs-preinline.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 224d808e5f5f7..a39a4d8c5658e 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -342,6 +342,15 @@ class SampleRecord {
                       : sampleprof_error::success;
   }
 
+  /// Decrease the number of samples for this record by \p S. Return the amout
+  /// of samples actually decreased.
+  uint64_t removeSamples(uint64_t S) {
+    if (S > NumSamples)
+      S = NumSamples;
+    NumSamples -= S;
+    return S;
+  }
+
   /// Add called function \p F with samples \p S.
   /// Optionally scale sample count \p S by \p Weight.
   ///
@@ -357,6 +366,18 @@ class SampleRecord {
                       : sampleprof_error::success;
   }
 
+  /// Remove called function from the call target map. Return the target sample
+  /// count of the called function.
+  uint64_t removeCalledTarget(StringRef F) {
+    uint64_t Count = 0;
+    auto I = CallTargets.find(F);
+    if (I != CallTargets.end()) {
+      Count = I->second;
+      CallTargets.erase(I);
+    }
+    return Count;
+  }
+
   /// Return true if this sample record contains function calls.
   bool hasCalls() const { return !CallTargets.empty(); }
 
@@ -704,6 +725,13 @@ class FunctionSamples {
                       : sampleprof_error::success;
   }
 
+  void removeTotalSamples(uint64_t Num) {
+    if (TotalSamples < Num)
+      TotalSamples = 0;
+    else
+      TotalSamples -= Num;
+  }
+
   void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
 
   sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
@@ -728,6 +756,22 @@ class FunctionSamples {
         FName, Num, Weight);
   }
 
+  // Remove a call target and decrease the body sample correspondingly. Return
+  // the number of body samples actually decreased.
+  uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset,
+                                           uint32_t Discriminator,
+                                           StringRef FName) {
+    uint64_t Count = 0;
+    auto I = BodySamples.find(LineLocation(LineOffset, Discriminator));
+    if (I != BodySamples.end()) {
+      Count = I->second.removeCalledTarget(FName);
+      Count = I->second.removeSamples(Count);
+      if (!I->second.getSamples())
+        BodySamples.erase(I);
+    }
+    return Count;
+  }
+
   sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
                                           uint64_t Weight = 1) {
     SampleRecord S;

diff  --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 9862605eed878..5e325433f6ea8 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -518,6 +518,12 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
       auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
       SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile);
       NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
+      // Remove the corresponding body sample for the callsite and update the
+      // total weight.
+      auto Count = NodeProfile->removeCalledTargetAndBodySample(
+          ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,
+          OrigChildContext.getName());
+      NodeProfile->removeTotalSamples(Count);
     }
 
     // Separate child profile to be a standalone profile, if the current parent

diff  --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
index 9c2105db71973..d458d7fad1be6 100644
--- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
+++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
@@ -10,17 +10,16 @@ RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sampl
 RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT
 RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
 RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY
-RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY
-RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY
+RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST
+RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST
 
 
-; CHECK:main:1968679:12
+; CHECK:main:1968621:12
 ; CHECK-NEXT: 2: 24
-; CHECK-NEXT: 3: 28 _Z5funcAi:18
-; CHECK-NEXT: 3.1: 28 _Z5funcBi:30
-; CHECK-NEXT: 3: _Z5funcAi:1467398
+; CHECK-NEXT: 3: 17 _Z5funcAi:7
+; CHECK-NEXT: 3.1: 10 _Z5funcBi:11
+; CHECK-NEXT: 3: _Z5funcAi:1467388
 ; CHECK-NEXT:  0: 10
-; CHECK-NEXT:  1: 10 _Z8funcLeafi:11
 ; CHECK-NEXT:  3: 24
 ; CHECK-NEXT:  1: _Z8funcLeafi:1467299
 ; CHECK-NEXT:   0: 6
@@ -30,9 +29,8 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; CHECK-NEXT:   15: 23
 ; CHECK-NEXT:   !Attributes: 2
 ; CHECK-NEXT:  !Attributes: 2
-; CHECK-NEXT: 3.1: _Z5funcBi:500973
+; CHECK-NEXT: 3.1: _Z5funcBi:500954
 ; CHECK-NEXT:  0: 19
-; CHECK-NEXT:  1: 19 _Z8funcLeafi:20
 ; CHECK-NEXT:  3: 12
 ; CHECK-NEXT:  1: _Z8funcLeafi:500853
 ; CHECK-NEXT:   0: 15
@@ -49,14 +47,12 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; CHECK-NEXT: 1: 13
 
 
-
-; RECOUNT:main:1968679:12
+; RECOUNT:main:1968621:12
 ; RECOUNT-NEXT: 2: 24
-; RECOUNT-NEXT: 3: 28 _Z5funcAi:18
-; RECOUNT-NEXT: 3.1: 28 _Z5funcBi:30
-; RECOUNT-NEXT: 3: _Z5funcAi:1467398
+; RECOUNT-NEXT: 3: 17 _Z5funcAi:7
+; RECOUNT-NEXT: 3.1: 10 _Z5funcBi:11
+; RECOUNT-NEXT: 3: _Z5funcAi:1467388
 ; RECOUNT-NEXT:  0: 10
-; RECOUNT-NEXT:  1: 10 _Z8funcLeafi:11
 ; RECOUNT-NEXT:  3: 24
 ; RECOUNT-NEXT:  1: _Z8funcLeafi:1467299
 ; RECOUNT-NEXT:   0: 6
@@ -66,9 +62,8 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; RECOUNT-NEXT:   15: 23
 ; RECOUNT-NEXT:   !Attributes: 6
 ; RECOUNT-NEXT:  !Attributes: 6
-; RECOUNT-NEXT: 3.1: _Z5funcBi:500973
+; RECOUNT-NEXT: 3.1: _Z5funcBi:500954
 ; RECOUNT-NEXT:  0: 19
-; RECOUNT-NEXT:  1: 19 _Z8funcLeafi:20
 ; RECOUNT-NEXT:  3: 12
 ; RECOUNT-NEXT:  1: _Z8funcLeafi:500853
 ; RECOUNT-NEXT:   0: 15
@@ -89,9 +84,8 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; RECOUNT-NEXT: 11: 23327 _Z3fibi:25228
 ; RECOUNT-NEXT: 15: 34
 ; RECOUNT-NEXT: !Attributes: 2
-; RECOUNT-NEXT:_Z5funcAi:1467398:11
+; RECOUNT-NEXT:_Z5funcAi:1467388:11
 ; RECOUNT-NEXT: 0: 10
-; RECOUNT-NEXT: 1: 10 _Z8funcLeafi:11
 ; RECOUNT-NEXT: 3: 24
 ; RECOUNT-NEXT: 1: _Z8funcLeafi:1467299
 ; RECOUNT-NEXT:  0: 6
@@ -101,9 +95,9 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; RECOUNT-NEXT:  15: 23
 ; RECOUNT-NEXT:  !Attributes: 6
 ; RECOUNT-NEXT: !Attributes: 2
-; RECOUNT-NEXT:_Z5funcBi:501213:32
+; RECOUNT-NEXT:_Z5funcBi:501194:32
 ; RECOUNT-NEXT: 0: 32
-; RECOUNT-NEXT: 1: 32 _Z8funcLeafi:20
+; RECOUNT-NEXT: 1: 13
 ; RECOUNT-NEXT: 3: 12
 ; RECOUNT-NEXT: 1: _Z8funcLeafi:500853
 ; RECOUNT-NEXT:  0: 15
@@ -115,13 +109,12 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; RECOUNT-NEXT:  15: 11
 ; RECOUNT-NEXT:  !Attributes: 6
 
-; PROBE:main:1968679:12
+; PROBE:main:1968621:12
 ; PROBE-NEXT: 2: 24
-; PROBE-NEXT: 3: 28 _Z5funcAi:18
-; PROBE-NEXT: 3.1: 28 _Z5funcBi:30
-; PROBE-NEXT: 3: _Z5funcAi:1467398
+; PROBE-NEXT: 3: 17 _Z5funcAi:7
+; PROBE-NEXT: 3.1: 10 _Z5funcBi:11
+; PROBE-NEXT: 3: _Z5funcAi:1467388
 ; PROBE-NEXT:  0: 10
-; PROBE-NEXT:  1: 10 _Z8funcLeafi:11
 ; PROBE-NEXT:  3: 24
 ; PROBE-NEXT:  1: _Z8funcLeafi:1467299
 ; PROBE-NEXT:   0: 6
@@ -133,9 +126,8 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; PROBE-NEXT:   !Attributes: 2
 ; PROBE-NEXT:  !CFGChecksum: 844530426352218
 ; PROBE-NEXT:  !Attributes: 2
-; PROBE-NEXT: 3.1: _Z5funcBi:500973
+; PROBE-NEXT: 3.1: _Z5funcBi:500954
 ; PROBE-NEXT:  0: 19
-; PROBE-NEXT:  1: 19 _Z8funcLeafi:20
 ; PROBE-NEXT:  3: 12
 ; PROBE-NEXT:  1: _Z8funcLeafi:500853
 ; PROBE-NEXT:   0: 15
@@ -181,3 +173,27 @@ RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -c
 ; SUMMARY-NEXT: 11 blocks with count >= 24 account for 99.99 percentage of the total counts.
 ; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.999 percentage of the total counts.
 ; SUMMARY-NEXT: 16 blocks with count >= 10 account for 99.9999 percentage of the total counts.
+
+
+; SUMMARY-NEST:      Total functions: 4
+; SUMMARY-NEST-NEXT: Maximum function count: 32
+; SUMMARY-NEST-NEXT: Maximum block count: 362830
+; SUMMARY-NEST-NEXT: Total number of blocks: 15
+; SUMMARY-NEST-NEXT: Total count: 772504
+; SUMMARY-NEST-NEXT: Detailed summary:
+; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 1 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 10 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 20 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 30 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 1 blocks with count >= 362830 account for 40 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 50 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 60 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 70 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 80 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 2 blocks with count >= 362805 account for 90 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 3 blocks with count >= 23327 account for 95 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 4 blocks with count >= 23324 account for 99 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 4 blocks with count >= 23324 account for 99.9 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 10 blocks with count >= 21 account for 99.99 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 15 blocks with count >= 10 account for 99.999 percentage of the total counts.
+; SUMMARY-NEST-NEXT: 15 blocks with count >= 10 account for 99.9999 percentage of the total counts.

diff  --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test
index 1844c6533f0e6..b48d63cf75b77 100644
--- a/llvm/test/tools/llvm-profgen/cs-preinline.test
+++ b/llvm/test/tools/llvm-profgen/cs-preinline.test
@@ -70,10 +70,9 @@
 ; CHECK-TRIM-NEXT: 1: 14
 ; CHECK-TRIM-NEXT: !Attributes: 3
 
-; CHECK-PREINL-NEST:     foo:393:0
+; CHECK-PREINL-NEST:     foo:379:0
 ; CHECK-PREINL-NEST-NEXT: 2.1: 14
 ; CHECK-PREINL-NEST-NEXT: 3: 15
-; CHECK-PREINL-NEST-NEXT: 3.1: 14 bar:14
 ; CHECK-PREINL-NEST-NEXT: 3.2: 1
 ; CHECK-PREINL-NEST-NEXT: 65526: 14
 ; CHECK-PREINL-NEST-NEXT: 3.1: bar:84


        


More information about the llvm-commits mailing list