[llvm] e8c245d - [llvm-profgen] Skip duplication factor outside of body sample computation

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 19 23:18:36 PDT 2021


Author: Wenlei He
Date: 2021-10-19T23:10:45-07:00
New Revision: e8c245dcd3b9f4237cdfb9dbcc6669d0b81e7936

URL: https://github.com/llvm/llvm-project/commit/e8c245dcd3b9f4237cdfb9dbcc6669d0b81e7936
DIFF: https://github.com/llvm/llvm-project/commit/e8c245dcd3b9f4237cdfb9dbcc6669d0b81e7936.diff

LOG: [llvm-profgen] Skip duplication factor outside of body sample computation

We incorrectly use duplication factor for total samples even though we already accumulate samples instead of taking MAX. It causes profile to have bloated total samples for functions with loop unrolled or vectorized. The change fix the issue for total sample, head sample and call target samples.

Differential Revision: https://reviews.llvm.org/D112042

Added: 
    

Modified: 
    llvm/test/tools/llvm-profgen/inline-noprobe2.test
    llvm/tools/llvm-profgen/ProfileGenerator.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test
index 6a523ddef3085..147c509d3f3d0 100644
--- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test
+++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test
@@ -46,7 +46,7 @@
 ;CHECK-NEXT:   1: 6
 ;CHECK-NEXT:   2: 6
 ;CHECK-NEXT:   3: 6
-;CHECK-NEXT: partition_pivot_last:647:7
+;CHECK-NEXT: partition_pivot_last:389:7
 ;CHECK-NEXT:  1: 6
 ;CHECK-NEXT:  2: 6
 ;CHECK-NEXT:  3: 6
@@ -62,7 +62,7 @@
 
 ;CHECK-NEXT:  6: 5
 ;CHECK-NEXT:  7: 5
-;CHECK-NEXT:  5: swap:116
+;CHECK-NEXT:  5: swap:61
 
 ;w/o duplication factor :  1: 9
 ;w/o duplication factor :  2: 9

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5d1cde308e8d6..e98554aa67136 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -274,6 +274,12 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
     uint64_t Count) {
   // Use the maximum count of samples with same line location
   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
+
+  // Use duplication factor to compensated for loop unroll/vectorization.
+  // Note that this is only needed when we're taking MAX of the counts at
+  // the location instead of SUM.
+  Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
+
   ErrorOr<uint64_t> R =
       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
 
@@ -384,12 +390,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
       const SampleContextFrameVector &FrameVec =
           Binary->getFrameLocationStack(Offset);
       if (!FrameVec.empty()) {
-        uint64_t DC = Count * getDuplicationFactor(
-                                  FrameVec.back().Location.Discriminator);
         FunctionSamples &FunctionProfile =
-            getLeafProfileAndAddTotalSamples(FrameVec, DC);
+            getLeafProfileAndAddTotalSamples(FrameVec, Count);
         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
-                                            DC);
+                                            Count);
       }
       // Move to next IP within the range.
       IP.advance();
@@ -430,7 +434,6 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
     const SampleContextFrameVector &FrameVec =
         Binary->getFrameLocationStack(SourceOffset);
     if (!FrameVec.empty()) {
-      Count *= getDuplicationFactor(FrameVec.back().Location.Discriminator);
       FunctionSamples &FunctionProfile =
           getLeafProfileAndAddTotalSamples(FrameVec, Count);
       FunctionProfile.addCalledTargetSamples(
@@ -545,10 +548,8 @@ void CSProfileGenerator::populateBodySamplesForFunction(
       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
       if (LeafLoc.hasValue()) {
         // Recording body sample for this specific context
-        uint64_t DC =
-            Count * getDuplicationFactor(LeafLoc->Location.Discriminator);
-        updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, DC);
-        FunctionProfile.addTotalSamples(DC);
+        updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
+        FunctionProfile.addTotalSamples(Count);
       }
 
       // Move to next IP within the range
@@ -575,7 +576,6 @@ void CSProfileGenerator::populateBoundarySamplesForFunction(
     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
     if (!LeafLoc.hasValue())
       continue;
-    Count *= getDuplicationFactor(LeafLoc->Location.Discriminator);
     FunctionProfile.addCalledTargetSamples(
         LeafLoc->Location.LineOffset,
         getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,


        


More information about the llvm-commits mailing list