[llvm] 345fd0c - [FS-AFDO] Generate pseudo-probe-based profiles with FS-discriminators.
Hongtao Yu via llvm-commits
llvm-commits at lists.llvm.org
Wed May 10 11:34:08 PDT 2023
Author: Hongtao Yu
Date: 2023-05-10T11:28:54-07:00
New Revision: 345fd0c10e90e85243265854ca8f6d9efa14f293
URL: https://github.com/llvm/llvm-project/commit/345fd0c10e90e85243265854ca8f6d9efa14f293
DIFF: https://github.com/llvm/llvm-project/commit/345fd0c10e90e85243265854ca8f6d9efa14f293.diff
LOG: [FS-AFDO] Generate pseudo-probe-based profiles with FS-discriminators.
This change enables generating pseudo-probe-based FS-AFDO profiles. The change is straightforward based-on previous change {D147651} by just injecting FS-discriminators into various profile generation spot.
Reviewed By: wenlei
Differential Revision: https://reviews.llvm.org/D147957
Added:
llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof
llvm/test/tools/llvm-profgen/fs-discriminator-probe.test
Modified:
llvm/include/llvm/ProfileData/SampleProf.h
llvm/tools/llvm-profgen/PerfReader.cpp
llvm/tools/llvm-profgen/ProfileGenerator.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index b8456eec1e9fd..f92b993bded59 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -800,13 +800,6 @@ class FunctionSamples {
return Count;
}
- sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
- uint64_t Weight = 1) {
- SampleRecord S;
- S.addSamples(Num, Weight);
- return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
- }
-
// Accumulate all call target samples to update the body samples.
void updateCallsiteSamples() {
for (auto &I : BodySamples) {
diff --git a/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof
new file mode 100644
index 0000000000000..84550afdd39f3
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof
@@ -0,0 +1,72 @@
+43
+650-66c:445
+650-675:409
+650-68b:175
+650-6a2:66
+685-68b:199
+685-6a2:205
+688-68b:453
+68d-6a2:782
+6b0-6b7:775
+6b0-6c0:2778
+6b0-6dc:856
+6b0-6f1:1550
+6b9-6c0:463
+6b9-6dc:122
+6b9-6f1:211
+6d4-6dc:2259
+6d4-6f1:1019
+700-71c:508
+720-734:194
+720-73c:751
+720-741:382
+720-751:1226
+722-734:306
+722-73c:2808
+722-751:790
+736-73c:315
+736-741:196
+758-76e:503
+770-77a:849
+770-798:678
+790-798:910
+79a-7ab:1478
+7b0-7b9:885
+7b0-7c3:682
+7bb-7c3:873
+800-81f:9
+824-852:9
+860-860:2542
+865-894:8
+865-8b0:2549
+865-8b5:302
+883-8b0:246
+89a-8b5:8
+27
+ffffffffffc00001->865:2868
+66c->688:458
+675->685:423
+68b->6b9:848
+6a2->79a:1086
+6b7->68d:793
+6c0->6d4:3343
+6dc->6b0:3340
+6f1->6b0:2873
+71c->736:520
+734->758:516
+73c->722:4012
+741->720:600
+751->720:2079
+76e->79a:524
+77a->7bb:879
+798->650:1117
+798->700:516
+7ab->770:1526
+7b9->790:916
+7c3->7b0:1597
+7c3->824:9
+81f->770:10
+852->89a:9
+894->800:9
+8b0->860:2831
+8b5->883:321
diff --git a/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test b/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test
new file mode 100644
index 0000000000000..ba169b37d0df4
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test
@@ -0,0 +1,139 @@
+; RUN: llvm-profgen --unsymbolized-profile=%S/Inputs/fs-discriminator-probe.raw.prof --binary=%S/Inputs/fs-discriminator-probe.perfbin --output=%t1
+; RUN: llvm-profdata show --sample --show-sec-info-only %t1 | FileCheck %s --check-prefix=CHECK-SECTION
+; RUN: llvm-profdata merge --sample %t1 -o %t2 --text
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK
+
+; CHECK-SECTION: ProfileSummarySection - Offset: [[#]], Size: [[#]], Flags: {fs-discriminator}
+
+; CHECK: partition_pivot_first:29661:2739
+; CHECK-NEXT: 1: 2739
+; CHECK-NEXT: 2.1: 2739
+; CHECK-NEXT: 2.11265: 0
+; CHECK-NEXT: 3: 6457
+; CHECK-NEXT: 4: 508
+; CHECK-NEXT: 5.1: 508
+; CHECK-NEXT: 6.2: 508
+; CHECK-NEXT: 7.3: 2780
+; CHECK-NEXT: 8.4: 0
+; CHECK-NEXT: 9.2: 0
+; CHECK-NEXT: 10: 6457
+; CHECK-NEXT: 11: swap:508
+; CHECK-NEXT: 1.7680: 508
+; CHECK-NEXT: !CFGChecksum: 4294967295
+; CHECK-NEXT: 12: swap:6457
+; CHECK-NEXT: 1.7168: 6457
+; CHECK-NEXT: !CFGChecksum: 4294967295
+; CHECK-NEXT: !CFGChecksum: 563159988274199
+; CHECK-NEXT: main:16724:1478
+; CHECK-NEXT: 1: 1478
+; CHECK-NEXT: 2.1: 1478
+; CHECK-NEXT: 2.3585: 0
+; CHECK-NEXT: 3: 3105
+; CHECK-NEXT: 5.1: 9
+; CHECK-NEXT: 5.1537: 0
+; CHECK-NEXT: 6: 0
+; CHECK-NEXT: 7: 2859
+; CHECK-NEXT: 8.1: 9
+; CHECK-NEXT: 10.3: 0
+; CHECK-NEXT: 11.2: 0
+; CHECK-NEXT: 12: 0
+; CHECK-NEXT: 13.2: 0
+; CHECK-NEXT: 14: 3105
+; CHECK-NEXT: 15: 1567
+; CHECK-NEXT: 16: 9
+; CHECK-NEXT: 17: 0
+; CHECK-NEXT: 18: 3105
+; CHECK-NEXT: !CFGChecksum: 1126178599120658
+; CHECK-NEXT: partition_pivot_last:10497:0
+; CHECK-NEXT: 1: 0
+; CHECK-NEXT: 2.1: 0
+; CHECK-NEXT: 2.6145: 1095
+; CHECK-NEXT: 2.7681: 241
+; CHECK-NEXT: 2.8193: 0
+; CHECK-NEXT: 3: 1095
+; CHECK-NEXT: 4: 0
+; CHECK-NEXT: 4.3072: 1098
+; CHECK-NEXT: 4.12800: 0
+; CHECK-NEXT: 5: 0
+; CHECK-NEXT: 5.5632: 0
+; CHECK-NEXT: 5.6144: 1053
+; CHECK-NEXT: 6.1: 1095
+; CHECK-NEXT: 6.12801: 0
+; CHECK-NEXT: 6.13825: 241
+; CHECK-NEXT: 7.2: 1095
+; CHECK-NEXT: 7.7170: 241
+; CHECK-NEXT: 7.7682: 0
+; CHECK-NEXT: 8: 1095
+; CHECK-NEXT: 9: swap:1053
+; CHECK-NEXT: 1.1024: 1053
+; CHECK-NEXT: 1.4608: 0
+; CHECK-NEXT: 1.15360: 0
+; CHECK-NEXT: !CFGChecksum: 4294967295
+; CHECK-NEXT: 10: swap:1095
+; CHECK-NEXT: 1.14848: 1095
+; CHECK-NEXT: !CFGChecksum: 4294967295
+; CHECK-NEXT: !CFGChecksum: 563108639284859
+; CHECK-NEXT: quick_sort:4881:2519
+; CHECK-NEXT: 1: 2016
+; CHECK-NEXT: 1.15360: 503
+; CHECK-NEXT: 2: 503
+; CHECK-NEXT: 3: 678
+; CHECK-NEXT: 4: 503
+; CHECK-NEXT: 5: 678
+; CHECK-NEXT: !CFGChecksum: 844480566202114
+
+
+
+; original code:
+; clang -O3 -g -mllvm --enable-fs-discriminator -fdebug-info-for-profiling -fpseudo-probe-for-profiling qsort.c -o a.out
+#include <stdio.h>
+#include <stdlib.h>
+
+void swap(int *a, int *b) {
+ int t = *a;
+ *a = *b;
+ *b = t;
+}
+
+int partition_pivot_last(int* array, int low, int high) {
+ int pivot = array[high];
+ int i = low - 1;
+ for (int j = low; j < high; j++)
+ if (array[j] < pivot)
+ swap(&array[++i], &array[j]);
+ swap(&array[i + 1], &array[high]);
+ return (i + 1);
+}
+
+int partition_pivot_first(int* array, int low, int high) {
+ int pivot = array[low];
+ int i = low + 1;
+ for (int j = low + 1; j <= high; j++)
+ if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
+ swap(&array[i - 1], &array[low]);
+ return i - 1;
+}
+
+void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
+ if (low < high) {
+ int pi = (*partition_func)(array, low, high);
+ quick_sort(array, low, pi - 1, partition_func);
+ quick_sort(array, pi + 1, high, partition_func);
+ }
+}
+
+int main() {
+ const int size = 200;
+ int sum = 0;
+ int *array = malloc(size * sizeof(int));
+ for(int i = 0; i < 100 * 1000; i++) {
+ for(int j = 0; j < size; j++)
+ array[j] = j % 10 ? rand() % size: j;
+ int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
+ quick_sort(array, 0, size - 1, fptr);
+ sum += array[i % size];
+ }
+ printf("sum=%d\n", sum);
+
+ return 0;
+}
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 86c0131e10102..9f45167306904 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -480,8 +480,6 @@ static std::string getContextKeyStr(ContextKey *K,
}
void HybridPerfReader::unwindSamples() {
- if (Binary->useFSDiscriminator())
- exitWithError("FS discriminator is not supported in CS profile.");
VirtualUnwinder Unwinder(&SampleCounters, Binary);
for (const auto &Item : AggregatedSamples) {
const PerfSample *Sample = Item.first.getPtr();
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 8e4b912e1b8b1..6632b003a236d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -122,8 +122,6 @@ ProfileGeneratorBase::create(ProfiledBinary *Binary,
bool ProfileIsCS) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (ProfileIsCS) {
- if (Binary->useFSDiscriminator())
- exitWithError("FS discriminator is not supported in CS profile.");
Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
} else {
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
@@ -139,8 +137,6 @@ ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
bool ProfileIsCS) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (ProfileIsCS) {
- if (Binary->useFSDiscriminator())
- exitWithError("FS discriminator is not supported in CS profile.");
Generator.reset(new CSProfileGenerator(Binary, Profiles));
} else {
Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
@@ -561,7 +557,8 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
Binary->getInlineContextForProbe(Probe, FrameVec, true);
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, Count);
- FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
+ FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
+ Count);
if (Probe->isEntry())
FunctionProfile.addHeadSamples(Count);
}
@@ -592,7 +589,9 @@ void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, 0);
FunctionProfile.addCalledTargetSamples(
- FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
+ FrameVec.back().Location.LineOffset,
+ FrameVec.back().Location.Discriminator,
+ CalleeName, Count);
}
}
}
@@ -1159,7 +1158,8 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
// collected for non-danglie probes. This is for reporting all of the
// zero count probes of the frame later.
FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
- FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
+ FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
+ Count);
FunctionProfile.addTotalSamples(Count);
if (Probe->isEntry()) {
FunctionProfile.addHeadSamples(Count);
@@ -1171,14 +1171,17 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
// context id to infer caller's context id to ensure they share the
// same context prefix.
uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
+ uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
assert(CallerIndex &&
"Inferred caller's location index shouldn't be zero!");
+ assert(!CallerDiscriminator &&
+ "Callsite probe should not have a discriminator!");
FunctionSamples &CallerProfile =
*getOrCreateFunctionSamples(CallerNode);
CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
- CallerProfile.addBodySamples(CallerIndex, 0, Count);
+ CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count);
CallerProfile.addTotalSamples(Count);
- CallerProfile.addCalledTargetSamples(CallerIndex, 0,
+ CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator,
ContextNode->getFuncName(), Count);
}
}
@@ -1190,7 +1193,8 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
for (auto &I : FrameSamples) {
for (auto *FunctionProfile : I.second) {
for (auto *Probe : I.first->getProbes()) {
- FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
+ FunctionProfile->addBodySamples(Probe->getIndex(),
+ Probe->getDiscriminator(), 0);
}
}
}
@@ -1213,8 +1217,9 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
if (CalleeName.size() == 0)
continue;
- FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
- Count);
+ FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
+ CallProbe->getDiscriminator(),
+ CalleeName, Count);
}
}
More information about the llvm-commits
mailing list