[llvm] e8c245d - [llvm-profgen] Skip duplication factor outside of body sample computation
Wenlei He via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 19 23:18:36 PDT 2021
Author: Wenlei He
Date: 2021-10-19T23:10:45-07:00
New Revision: e8c245dcd3b9f4237cdfb9dbcc6669d0b81e7936
URL: https://github.com/llvm/llvm-project/commit/e8c245dcd3b9f4237cdfb9dbcc6669d0b81e7936
DIFF: https://github.com/llvm/llvm-project/commit/e8c245dcd3b9f4237cdfb9dbcc6669d0b81e7936.diff
LOG: [llvm-profgen] Skip duplication factor outside of body sample computation
We incorrectly use duplication factor for total samples even though we already accumulate samples instead of taking MAX. It causes profile to have bloated total samples for functions with loop unrolled or vectorized. The change fix the issue for total sample, head sample and call target samples.
Differential Revision: https://reviews.llvm.org/D112042
Added:
Modified:
llvm/test/tools/llvm-profgen/inline-noprobe2.test
llvm/tools/llvm-profgen/ProfileGenerator.cpp
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test
index 6a523ddef3085..147c509d3f3d0 100644
--- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test
+++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test
@@ -46,7 +46,7 @@
;CHECK-NEXT: 1: 6
;CHECK-NEXT: 2: 6
;CHECK-NEXT: 3: 6
-;CHECK-NEXT: partition_pivot_last:647:7
+;CHECK-NEXT: partition_pivot_last:389:7
;CHECK-NEXT: 1: 6
;CHECK-NEXT: 2: 6
;CHECK-NEXT: 3: 6
@@ -62,7 +62,7 @@
;CHECK-NEXT: 6: 5
;CHECK-NEXT: 7: 5
-;CHECK-NEXT: 5: swap:116
+;CHECK-NEXT: 5: swap:61
;w/o duplication factor : 1: 9
;w/o duplication factor : 2: 9
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5d1cde308e8d6..e98554aa67136 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -274,6 +274,12 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
uint64_t Count) {
// Use the maximum count of samples with same line location
uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
+
+ // Use duplication factor to compensated for loop unroll/vectorization.
+ // Note that this is only needed when we're taking MAX of the counts at
+ // the location instead of SUM.
+ Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
+
ErrorOr<uint64_t> R =
FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
@@ -384,12 +390,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(Offset);
if (!FrameVec.empty()) {
- uint64_t DC = Count * getDuplicationFactor(
- FrameVec.back().Location.Discriminator);
FunctionSamples &FunctionProfile =
- getLeafProfileAndAddTotalSamples(FrameVec, DC);
+ getLeafProfileAndAddTotalSamples(FrameVec, Count);
updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
- DC);
+ Count);
}
// Move to next IP within the range.
IP.advance();
@@ -430,7 +434,6 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(SourceOffset);
if (!FrameVec.empty()) {
- Count *= getDuplicationFactor(FrameVec.back().Location.Discriminator);
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, Count);
FunctionProfile.addCalledTargetSamples(
@@ -545,10 +548,8 @@ void CSProfileGenerator::populateBodySamplesForFunction(
auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
if (LeafLoc.hasValue()) {
// Recording body sample for this specific context
- uint64_t DC =
- Count * getDuplicationFactor(LeafLoc->Location.Discriminator);
- updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, DC);
- FunctionProfile.addTotalSamples(DC);
+ updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
+ FunctionProfile.addTotalSamples(Count);
}
// Move to next IP within the range
@@ -575,7 +576,6 @@ void CSProfileGenerator::populateBoundarySamplesForFunction(
auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
if (!LeafLoc.hasValue())
continue;
- Count *= getDuplicationFactor(LeafLoc->Location.Discriminator);
FunctionProfile.addCalledTargetSamples(
LeafLoc->Location.LineOffset,
getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
More information about the llvm-commits
mailing list