[llvm] 27cb370 - [llvm-profgen] Trim cold function profiles for non-CS AutoFDO

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 8 12:21:19 PST 2021


Author: wlei
Date: 2021-12-08T12:20:50-08:00
New Revision: 27cb3707db4b424fbe4d94a6822616ff749de12a

URL: https://github.com/llvm/llvm-project/commit/27cb3707db4b424fbe4d94a6822616ff749de12a
DIFF: https://github.com/llvm/llvm-project/commit/27cb3707db4b424fbe4d94a6822616ff749de12a.diff

LOG: [llvm-profgen] Trim cold function profiles for non-CS AutoFDO

This change allows to trim the profile if it's considered to be cold for baseline AutoFDO. We reuse the cold threshold from `ProfileSummaryBuilder::getColdCountThreshold(..)` which can be set by percent(--profile-summary-cutoff-cold) or by value(--profile-summary-cold-count).

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D113785

Added: 
    llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof
    llvm/test/tools/llvm-profgen/cold-profile-trimming.test

Modified: 
    llvm/test/tools/llvm-profgen/cs-preinline.test
    llvm/test/tools/llvm-profgen/merge-cold-profile.test
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof
new file mode 100644
index 0000000000000..9230ac16016ea
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof
@@ -0,0 +1,55 @@
+27
+400540-400540:10
+400650-40066d:31
+400686-400689:3
+40068b-4006a2:30
+4006b0-4006b7:3
+4006b0-4006bf:60
+4006b0-4006c8:6
+4006d0-4006ea:51
+4006d0-400700:4
+4006ec-400700:30
+400710-40072f:5
+400740-400753:3
+400740-40075b:9
+400740-40076e:14
+400743-400753:3
+400743-40075b:43
+400743-40076e:11
+400755-40075b:4
+400770-400788:6
+400790-400792:12
+400790-4007a6:12
+4007a8-4007b8:11
+4007bd-4007ca:12
+4007cf-4007d7:12
+4007d7-4007d7:12
+400870-400870:12
+400875-4008bf:10
+26
+40066d->400686:3
+400675->400682:1
+400689->4006b9:4
+4006a2->4007a8:6
+4006b7->40068b:3
+4006bf->4006d0:9
+4006c8->4006b0:7
+4006ca->4006ec:3
+4006ea->4006b0:5
+400700->4006b0:7
+40072f->400755:5
+400753->400770:6
+40075b->400743:58
+40075f->400740:2
+40076e->400740:25
+400788->4007a8:6
+400792->4007d7:12
+4007a6->400650:7
+4007a6->400710:5
+4007b8->400790:12
+4007ca->400790:12
+4007d7->4007bd:12
+4007d7->4007cf:13
+40082f->400790:1
+400870->400540:12
+4008bf->400870:15

diff  --git a/llvm/test/tools/llvm-profgen/cold-profile-trimming.test b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test
new file mode 100644
index 0000000000000..bee98802e8026
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test
@@ -0,0 +1,68 @@
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=0
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-NOTRIM
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=1 --profile-summary-cold-count=100
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM
+
+;CHECK-NOTRIM: partition_pivot_last:1091:7
+;CHECK-NOTRIM: partition_pivot_first:365:5
+;CHECK-NOTRIM: quick_sort:83:25
+;CHECK-NOTRIM: main:52:0
+
+;CHECK-TRIM: partition_pivot_last:1091:7
+;CHECK-TRIM: partition_pivot_first:365:5
+;CHECK-TRIM-NOT: quick_sort:83:25
+;CHECK-TRIM-NOT: main:52:0
+
+; original code:
+; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
+#include <stdio.h>
+#include <stdlib.h>
+
+void swap(int *a, int *b) {
+	int t = *a;
+	*a = *b;
+	*b = t;
+}
+
+int partition_pivot_last(int* array, int low, int high) {
+	int pivot = array[high];
+	int i = low - 1;
+	for (int j = low; j < high; j++)
+		if (array[j] < pivot)
+			swap(&array[++i], &array[j]);
+	swap(&array[i + 1], &array[high]);
+	return (i + 1);
+}
+
+int partition_pivot_first(int* array, int low, int high) {
+	int pivot = array[low];
+	int i = low + 1;
+	for (int j = low + 1; j <= high; j++)
+		if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
+	swap(&array[i - 1], &array[low]);
+	return i - 1;
+}
+
+void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
+	if (low < high) {
+		int pi = (*partition_func)(array, low, high);
+		quick_sort(array, low, pi - 1, partition_func);
+		quick_sort(array, pi + 1, high, partition_func);
+	}
+}
+
+int main() {
+	const int size = 200;
+	int sum = 0;
+	int *array = malloc(size * sizeof(int));
+	for(int i = 0; i < 100 * 1000; i++) {
+		for(int j = 0; j < size; j++)
+			array[j] = j % 10 ? rand() % size: j;
+		int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
+		quick_sort(array, 0, size - 1, fptr);
+		sum += array[i % size];
+	}
+	printf("sum=%d\n", sum);
+
+	return 0;
+}

diff  --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test
index 3d7d59ae214ff..94ef76207168a 100644
--- a/llvm/test/tools/llvm-profgen/cs-preinline.test
+++ b/llvm/test/tools/llvm-profgen/cs-preinline.test
@@ -11,7 +11,7 @@
 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL
 
 ; Test cold profile trimming. Only base profiles should be dropped.
-; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1  --csprof-trim-cold-context=1 --profile-summary-hot-count=250
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1  --trim-cold-profile=1 --profile-summary-hot-count=250
 
 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM
 

diff  --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
index a012ead9ac57b..120d83e35d7d1 100644
--- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test
+++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
@@ -2,8 +2,8 @@
 ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8
 ; RUN: FileCheck %s --input-file %t1
 
-; Test --csprof-trim-cold-context=0
-; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0
+; Test --trim-cold-profile=0
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0
 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD
 
 ; Test --csprof-merge-cold-context=0
@@ -11,7 +11,7 @@
 ; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
 
 ; Test --csprof-frame-depth-for-cold-context
-; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 --csprof-max-cold-context-depth=2
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2
 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
 
 ; CHECK:     [fa]:14:4

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 0b90352bc4457..c40604d8a9f0b 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -50,17 +50,17 @@ static cl::opt<int32_t, true> RecursionCompression(
     cl::Hidden,
     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
 
+static cl::opt<bool>
+    TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
+                    cl::desc("If the total count of the profile is smaller "
+                             "than threshold, it will be trimmed."));
+
 static cl::opt<bool> CSProfMergeColdContext(
     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
     cl::desc("If the total count of context profile is smaller than "
              "the threshold, it will be merged into context-less base "
              "profile."));
 
-static cl::opt<bool> CSProfTrimColdContext(
-    "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
-    cl::desc("If the total count of the profile after all merge is done "
-             "is still smaller than threshold, it will be trimmed."));
-
 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
@@ -378,9 +378,27 @@ void ProfileGenerator::generateProfile() {
 
 void ProfileGenerator::postProcessProfiles() {
   computeSummaryAndThreshold();
+  trimColdProfiles(ProfileMap, ColdCountThreshold);
   calculateAndShowDensity(ProfileMap);
 }
 
+void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
+                                        uint64_t ColdCntThreshold) {
+  if (!TrimColdProfile)
+    return;
+
+  // Move cold profiles into a tmp container.
+  std::vector<SampleContext> ColdProfiles;
+  for (const auto &I : ProfileMap) {
+    if (I.second.getTotalSamples() < ColdCntThreshold)
+      ColdProfiles.emplace_back(I.first);
+  }
+
+  // Remove the cold profile from ProfileMap.
+  for (const auto &I : ColdProfiles)
+    ProfileMap.erase(I);
+}
+
 void ProfileGenerator::generateLineNumBasedProfile() {
   assert(SampleCounters.size() == 1 &&
          "Must have one entry for profile generation.");
@@ -732,10 +750,10 @@ void CSProfileGenerator::postProcessProfiles() {
   }
 
   // Trim and merge cold context profile using cold threshold above.
-  if (CSProfTrimColdContext || CSProfMergeColdContext) {
+  if (TrimColdProfile || CSProfMergeColdContext) {
     SampleContextTrimmer(ProfileMap)
         .trimAndMergeColdContextProfiles(
-            HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
+            HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
             CSProfMaxColdContextDepth, EnableCSPreInliner);
   }
 

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index c4b77aa095491..de13f7d840a70 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -129,6 +129,8 @@ class ProfileGenerator : public ProfileGeneratorBase {
   void
   populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
   void postProcessProfiles();
+  void trimColdProfiles(const SampleProfileMap &Profiles,
+                        uint64_t ColdCntThreshold);
 };
 
 using ProbeCounterMap =


        


More information about the llvm-commits mailing list