[llvm] 27cb370 - [llvm-profgen] Trim cold function profiles for non-CS AutoFDO
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 8 12:21:19 PST 2021
Author: wlei
Date: 2021-12-08T12:20:50-08:00
New Revision: 27cb3707db4b424fbe4d94a6822616ff749de12a
URL: https://github.com/llvm/llvm-project/commit/27cb3707db4b424fbe4d94a6822616ff749de12a
DIFF: https://github.com/llvm/llvm-project/commit/27cb3707db4b424fbe4d94a6822616ff749de12a.diff
LOG: [llvm-profgen] Trim cold function profiles for non-CS AutoFDO
This change allows to trim the profile if it's considered to be cold for baseline AutoFDO. We reuse the cold threshold from `ProfileSummaryBuilder::getColdCountThreshold(..)` which can be set by percent(--profile-summary-cutoff-cold) or by value(--profile-summary-cold-count).
Reviewed By: hoy, wenlei
Differential Revision: https://reviews.llvm.org/D113785
Added:
llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof
llvm/test/tools/llvm-profgen/cold-profile-trimming.test
Modified:
llvm/test/tools/llvm-profgen/cs-preinline.test
llvm/test/tools/llvm-profgen/merge-cold-profile.test
llvm/tools/llvm-profgen/ProfileGenerator.cpp
llvm/tools/llvm-profgen/ProfileGenerator.h
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof
new file mode 100644
index 0000000000000..9230ac16016ea
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof
@@ -0,0 +1,55 @@
+27
+400540-400540:10
+400650-40066d:31
+400686-400689:3
+40068b-4006a2:30
+4006b0-4006b7:3
+4006b0-4006bf:60
+4006b0-4006c8:6
+4006d0-4006ea:51
+4006d0-400700:4
+4006ec-400700:30
+400710-40072f:5
+400740-400753:3
+400740-40075b:9
+400740-40076e:14
+400743-400753:3
+400743-40075b:43
+400743-40076e:11
+400755-40075b:4
+400770-400788:6
+400790-400792:12
+400790-4007a6:12
+4007a8-4007b8:11
+4007bd-4007ca:12
+4007cf-4007d7:12
+4007d7-4007d7:12
+400870-400870:12
+400875-4008bf:10
+26
+40066d->400686:3
+400675->400682:1
+400689->4006b9:4
+4006a2->4007a8:6
+4006b7->40068b:3
+4006bf->4006d0:9
+4006c8->4006b0:7
+4006ca->4006ec:3
+4006ea->4006b0:5
+400700->4006b0:7
+40072f->400755:5
+400753->400770:6
+40075b->400743:58
+40075f->400740:2
+40076e->400740:25
+400788->4007a8:6
+400792->4007d7:12
+4007a6->400650:7
+4007a6->400710:5
+4007b8->400790:12
+4007ca->400790:12
+4007d7->4007bd:12
+4007d7->4007cf:13
+40082f->400790:1
+400870->400540:12
+4008bf->400870:15
diff --git a/llvm/test/tools/llvm-profgen/cold-profile-trimming.test b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test
new file mode 100644
index 0000000000000..bee98802e8026
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test
@@ -0,0 +1,68 @@
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=0
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-NOTRIM
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=1 --profile-summary-cold-count=100
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM
+
+;CHECK-NOTRIM: partition_pivot_last:1091:7
+;CHECK-NOTRIM: partition_pivot_first:365:5
+;CHECK-NOTRIM: quick_sort:83:25
+;CHECK-NOTRIM: main:52:0
+
+;CHECK-TRIM: partition_pivot_last:1091:7
+;CHECK-TRIM: partition_pivot_first:365:5
+;CHECK-TRIM-NOT: quick_sort:83:25
+;CHECK-TRIM-NOT: main:52:0
+
+; original code:
+; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
+#include <stdio.h>
+#include <stdlib.h>
+
+void swap(int *a, int *b) {
+ int t = *a;
+ *a = *b;
+ *b = t;
+}
+
+int partition_pivot_last(int* array, int low, int high) {
+ int pivot = array[high];
+ int i = low - 1;
+ for (int j = low; j < high; j++)
+ if (array[j] < pivot)
+ swap(&array[++i], &array[j]);
+ swap(&array[i + 1], &array[high]);
+ return (i + 1);
+}
+
+int partition_pivot_first(int* array, int low, int high) {
+ int pivot = array[low];
+ int i = low + 1;
+ for (int j = low + 1; j <= high; j++)
+ if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
+ swap(&array[i - 1], &array[low]);
+ return i - 1;
+}
+
+void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
+ if (low < high) {
+ int pi = (*partition_func)(array, low, high);
+ quick_sort(array, low, pi - 1, partition_func);
+ quick_sort(array, pi + 1, high, partition_func);
+ }
+}
+
+int main() {
+ const int size = 200;
+ int sum = 0;
+ int *array = malloc(size * sizeof(int));
+ for(int i = 0; i < 100 * 1000; i++) {
+ for(int j = 0; j < size; j++)
+ array[j] = j % 10 ? rand() % size: j;
+ int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
+ quick_sort(array, 0, size - 1, fptr);
+ sum += array[i % size];
+ }
+ printf("sum=%d\n", sum);
+
+ return 0;
+}
diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test
index 3d7d59ae214ff..94ef76207168a 100644
--- a/llvm/test/tools/llvm-profgen/cs-preinline.test
+++ b/llvm/test/tools/llvm-profgen/cs-preinline.test
@@ -11,7 +11,7 @@
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL
; Test cold profile trimming. Only base profiles should be dropped.
-; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --csprof-trim-cold-context=1 --profile-summary-hot-count=250
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --trim-cold-profile=1 --profile-summary-hot-count=250
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM
diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
index a012ead9ac57b..120d83e35d7d1 100644
--- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test
+++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
@@ -2,8 +2,8 @@
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8
; RUN: FileCheck %s --input-file %t1
-; Test --csprof-trim-cold-context=0
-; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0
+; Test --trim-cold-profile=0
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD
; Test --csprof-merge-cold-context=0
@@ -11,7 +11,7 @@
; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
; Test --csprof-frame-depth-for-cold-context
-; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 --csprof-max-cold-context-depth=2
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
; CHECK: [fa]:14:4
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 0b90352bc4457..c40604d8a9f0b 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -50,17 +50,17 @@ static cl::opt<int32_t, true> RecursionCompression(
cl::Hidden,
cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
+static cl::opt<bool>
+ TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
+ cl::desc("If the total count of the profile is smaller "
+ "than threshold, it will be trimmed."));
+
static cl::opt<bool> CSProfMergeColdContext(
"csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
cl::desc("If the total count of context profile is smaller than "
"the threshold, it will be merged into context-less base "
"profile."));
-static cl::opt<bool> CSProfTrimColdContext(
- "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
- cl::desc("If the total count of the profile after all merge is done "
- "is still smaller than threshold, it will be trimmed."));
-
static cl::opt<uint32_t> CSProfMaxColdContextDepth(
"csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
@@ -378,9 +378,27 @@ void ProfileGenerator::generateProfile() {
void ProfileGenerator::postProcessProfiles() {
computeSummaryAndThreshold();
+ trimColdProfiles(ProfileMap, ColdCountThreshold);
calculateAndShowDensity(ProfileMap);
}
+void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
+ uint64_t ColdCntThreshold) {
+ if (!TrimColdProfile)
+ return;
+
+ // Move cold profiles into a tmp container.
+ std::vector<SampleContext> ColdProfiles;
+ for (const auto &I : ProfileMap) {
+ if (I.second.getTotalSamples() < ColdCntThreshold)
+ ColdProfiles.emplace_back(I.first);
+ }
+
+ // Remove the cold profile from ProfileMap.
+ for (const auto &I : ColdProfiles)
+ ProfileMap.erase(I);
+}
+
void ProfileGenerator::generateLineNumBasedProfile() {
assert(SampleCounters.size() == 1 &&
"Must have one entry for profile generation.");
@@ -732,10 +750,10 @@ void CSProfileGenerator::postProcessProfiles() {
}
// Trim and merge cold context profile using cold threshold above.
- if (CSProfTrimColdContext || CSProfMergeColdContext) {
+ if (TrimColdProfile || CSProfMergeColdContext) {
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
- HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
+ HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
CSProfMaxColdContextDepth, EnableCSPreInliner);
}
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index c4b77aa095491..de13f7d840a70 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -129,6 +129,8 @@ class ProfileGenerator : public ProfileGeneratorBase {
void
populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
void postProcessProfiles();
+ void trimColdProfiles(const SampleProfileMap &Profiles,
+ uint64_t ColdCntThreshold);
};
using ProbeCounterMap =
More information about the llvm-commits
mailing list