[llvm] 345fd0c - [FS-AFDO] Generate pseudo-probe-based profiles with FS-discriminators.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Wed May 10 11:34:08 PDT 2023


Author: Hongtao Yu
Date: 2023-05-10T11:28:54-07:00
New Revision: 345fd0c10e90e85243265854ca8f6d9efa14f293

URL: https://github.com/llvm/llvm-project/commit/345fd0c10e90e85243265854ca8f6d9efa14f293
DIFF: https://github.com/llvm/llvm-project/commit/345fd0c10e90e85243265854ca8f6d9efa14f293.diff

LOG: [FS-AFDO] Generate pseudo-probe-based profiles with FS-discriminators.

This change enables generating pseudo-probe-based FS-AFDO profiles. The change is straightforward based-on previous change {D147651} by just injecting FS-discriminators into various profile generation spot.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D147957

Added: 
    llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof
    llvm/test/tools/llvm-profgen/fs-discriminator-probe.test

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/tools/llvm-profgen/PerfReader.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index b8456eec1e9fd..f92b993bded59 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -800,13 +800,6 @@ class FunctionSamples {
     return Count;
   }
 
-  sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
-                                          uint64_t Weight = 1) {
-    SampleRecord S;
-    S.addSamples(Num, Weight);
-    return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
-  }
-
   // Accumulate all call target samples to update the body samples.
   void updateCallsiteSamples() {
     for (auto &I : BodySamples) {

diff  --git a/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof
new file mode 100644
index 0000000000000..84550afdd39f3
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof
@@ -0,0 +1,72 @@
+43
+650-66c:445
+650-675:409
+650-68b:175
+650-6a2:66
+685-68b:199
+685-6a2:205
+688-68b:453
+68d-6a2:782
+6b0-6b7:775
+6b0-6c0:2778
+6b0-6dc:856
+6b0-6f1:1550
+6b9-6c0:463
+6b9-6dc:122
+6b9-6f1:211
+6d4-6dc:2259
+6d4-6f1:1019
+700-71c:508
+720-734:194
+720-73c:751
+720-741:382
+720-751:1226
+722-734:306
+722-73c:2808
+722-751:790
+736-73c:315
+736-741:196
+758-76e:503
+770-77a:849
+770-798:678
+790-798:910
+79a-7ab:1478
+7b0-7b9:885
+7b0-7c3:682
+7bb-7c3:873
+800-81f:9
+824-852:9
+860-860:2542
+865-894:8
+865-8b0:2549
+865-8b5:302
+883-8b0:246
+89a-8b5:8
+27
+ffffffffffc00001->865:2868
+66c->688:458
+675->685:423
+68b->6b9:848
+6a2->79a:1086
+6b7->68d:793
+6c0->6d4:3343
+6dc->6b0:3340
+6f1->6b0:2873
+71c->736:520
+734->758:516
+73c->722:4012
+741->720:600
+751->720:2079
+76e->79a:524
+77a->7bb:879
+798->650:1117
+798->700:516
+7ab->770:1526
+7b9->790:916
+7c3->7b0:1597
+7c3->824:9
+81f->770:10
+852->89a:9
+894->800:9
+8b0->860:2831
+8b5->883:321

diff  --git a/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test b/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test
new file mode 100644
index 0000000000000..ba169b37d0df4
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test
@@ -0,0 +1,139 @@
+; RUN: llvm-profgen --unsymbolized-profile=%S/Inputs/fs-discriminator-probe.raw.prof --binary=%S/Inputs/fs-discriminator-probe.perfbin --output=%t1
+; RUN: llvm-profdata show --sample --show-sec-info-only %t1 | FileCheck %s --check-prefix=CHECK-SECTION
+; RUN: llvm-profdata merge --sample %t1 -o %t2 --text
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK
+
+; CHECK-SECTION: ProfileSummarySection - Offset: [[#]], Size: [[#]], Flags: {fs-discriminator}
+
+; CHECK:       partition_pivot_first:29661:2739
+; CHECK-NEXT:   1: 2739
+; CHECK-NEXT:   2.1: 2739
+; CHECK-NEXT:   2.11265: 0
+; CHECK-NEXT:   3: 6457
+; CHECK-NEXT:   4: 508
+; CHECK-NEXT:   5.1: 508
+; CHECK-NEXT:   6.2: 508
+; CHECK-NEXT:   7.3: 2780
+; CHECK-NEXT:   8.4: 0
+; CHECK-NEXT:   9.2: 0
+; CHECK-NEXT:   10: 6457
+; CHECK-NEXT:   11: swap:508
+; CHECK-NEXT:    1.7680: 508
+; CHECK-NEXT:    !CFGChecksum: 4294967295
+; CHECK-NEXT:   12: swap:6457
+; CHECK-NEXT:    1.7168: 6457
+; CHECK-NEXT:    !CFGChecksum: 4294967295
+; CHECK-NEXT:   !CFGChecksum: 563159988274199
+; CHECK-NEXT:  main:16724:1478
+; CHECK-NEXT:   1: 1478
+; CHECK-NEXT:   2.1: 1478
+; CHECK-NEXT:   2.3585: 0
+; CHECK-NEXT:   3: 3105
+; CHECK-NEXT:   5.1: 9
+; CHECK-NEXT:   5.1537: 0
+; CHECK-NEXT:   6: 0
+; CHECK-NEXT:   7: 2859
+; CHECK-NEXT:   8.1: 9
+; CHECK-NEXT:   10.3: 0
+; CHECK-NEXT:   11.2: 0
+; CHECK-NEXT:   12: 0
+; CHECK-NEXT:   13.2: 0
+; CHECK-NEXT:   14: 3105
+; CHECK-NEXT:   15: 1567
+; CHECK-NEXT:   16: 9
+; CHECK-NEXT:   17: 0
+; CHECK-NEXT:   18: 3105
+; CHECK-NEXT:   !CFGChecksum: 1126178599120658
+; CHECK-NEXT:  partition_pivot_last:10497:0
+; CHECK-NEXT:   1: 0
+; CHECK-NEXT:   2.1: 0
+; CHECK-NEXT:   2.6145: 1095
+; CHECK-NEXT:   2.7681: 241
+; CHECK-NEXT:   2.8193: 0
+; CHECK-NEXT:   3: 1095
+; CHECK-NEXT:   4: 0
+; CHECK-NEXT:   4.3072: 1098
+; CHECK-NEXT:   4.12800: 0
+; CHECK-NEXT:   5: 0
+; CHECK-NEXT:   5.5632: 0
+; CHECK-NEXT:   5.6144: 1053
+; CHECK-NEXT:   6.1: 1095
+; CHECK-NEXT:   6.12801: 0
+; CHECK-NEXT:   6.13825: 241
+; CHECK-NEXT:   7.2: 1095
+; CHECK-NEXT:   7.7170: 241
+; CHECK-NEXT:   7.7682: 0
+; CHECK-NEXT:   8: 1095
+; CHECK-NEXT:   9: swap:1053
+; CHECK-NEXT:    1.1024: 1053
+; CHECK-NEXT:    1.4608: 0
+; CHECK-NEXT:    1.15360: 0
+; CHECK-NEXT:    !CFGChecksum: 4294967295
+; CHECK-NEXT:   10: swap:1095
+; CHECK-NEXT:    1.14848: 1095
+; CHECK-NEXT:    !CFGChecksum: 4294967295
+; CHECK-NEXT:   !CFGChecksum: 563108639284859
+; CHECK-NEXT:  quick_sort:4881:2519
+; CHECK-NEXT:   1: 2016
+; CHECK-NEXT:   1.15360: 503
+; CHECK-NEXT:   2: 503
+; CHECK-NEXT:   3: 678
+; CHECK-NEXT:   4: 503
+; CHECK-NEXT:   5: 678
+; CHECK-NEXT:   !CFGChecksum: 844480566202114
+
+
+
+; original code:
+; clang -O3 -g -mllvm --enable-fs-discriminator -fdebug-info-for-profiling -fpseudo-probe-for-profiling qsort.c -o a.out
+#include <stdio.h>
+#include <stdlib.h>
+
+void swap(int *a, int *b) {
+	int t = *a;
+	*a = *b;
+	*b = t;
+}
+
+int partition_pivot_last(int* array, int low, int high) {
+	int pivot = array[high];
+	int i = low - 1;
+	for (int j = low; j < high; j++)
+		if (array[j] < pivot)
+			swap(&array[++i], &array[j]);
+	swap(&array[i + 1], &array[high]);
+	return (i + 1);
+}
+
+int partition_pivot_first(int* array, int low, int high) {
+	int pivot = array[low];
+	int i = low + 1;
+	for (int j = low + 1; j <= high; j++)
+		if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
+	swap(&array[i - 1], &array[low]);
+	return i - 1;
+}
+
+void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
+	if (low < high) {
+		int pi = (*partition_func)(array, low, high);
+		quick_sort(array, low, pi - 1, partition_func);
+		quick_sort(array, pi + 1, high, partition_func);
+	}
+}
+
+int main() {
+	const int size = 200;
+	int sum = 0;
+	int *array = malloc(size * sizeof(int));
+	for(int i = 0; i < 100 * 1000; i++) {
+		for(int j = 0; j < size; j++)
+			array[j] = j % 10 ? rand() % size: j;
+		int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
+		quick_sort(array, 0, size - 1, fptr);
+		sum += array[i % size];
+	}
+	printf("sum=%d\n", sum);
+
+	return 0;
+}

diff  --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 86c0131e10102..9f45167306904 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -480,8 +480,6 @@ static std::string getContextKeyStr(ContextKey *K,
 }
 
 void HybridPerfReader::unwindSamples() {
-  if (Binary->useFSDiscriminator())
-    exitWithError("FS discriminator is not supported in CS profile.");
   VirtualUnwinder Unwinder(&SampleCounters, Binary);
   for (const auto &Item : AggregatedSamples) {
     const PerfSample *Sample = Item.first.getPtr();

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 8e4b912e1b8b1..6632b003a236d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -122,8 +122,6 @@ ProfileGeneratorBase::create(ProfiledBinary *Binary,
                              bool ProfileIsCS) {
   std::unique_ptr<ProfileGeneratorBase> Generator;
   if (ProfileIsCS) {
-    if (Binary->useFSDiscriminator())
-      exitWithError("FS discriminator is not supported in CS profile.");
     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
   } else {
     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
@@ -139,8 +137,6 @@ ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
                              bool ProfileIsCS) {
   std::unique_ptr<ProfileGeneratorBase> Generator;
   if (ProfileIsCS) {
-    if (Binary->useFSDiscriminator())
-      exitWithError("FS discriminator is not supported in CS profile.");
     Generator.reset(new CSProfileGenerator(Binary, Profiles));
   } else {
     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
@@ -561,7 +557,8 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
     Binary->getInlineContextForProbe(Probe, FrameVec, true);
     FunctionSamples &FunctionProfile =
         getLeafProfileAndAddTotalSamples(FrameVec, Count);
-    FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
+    FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
+                                   Count);
     if (Probe->isEntry())
       FunctionProfile.addHeadSamples(Count);
   }
@@ -592,7 +589,9 @@ void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
       FunctionSamples &FunctionProfile =
           getLeafProfileAndAddTotalSamples(FrameVec, 0);
       FunctionProfile.addCalledTargetSamples(
-          FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
+          FrameVec.back().Location.LineOffset,
+          FrameVec.back().Location.Discriminator,
+          CalleeName, Count);
     }
   }
 }
@@ -1159,7 +1158,8 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
     // collected for non-danglie probes. This is for reporting all of the
     // zero count probes of the frame later.
     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
-    FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
+    FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
+                                   Count);
     FunctionProfile.addTotalSamples(Count);
     if (Probe->isEntry()) {
       FunctionProfile.addHeadSamples(Count);
@@ -1171,14 +1171,17 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
         // context id to infer caller's context id to ensure they share the
         // same context prefix.
         uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
+        uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
         assert(CallerIndex &&
                "Inferred caller's location index shouldn't be zero!");
+        assert(!CallerDiscriminator &&
+               "Callsite probe should not have a discriminator!");
         FunctionSamples &CallerProfile =
             *getOrCreateFunctionSamples(CallerNode);
         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
-        CallerProfile.addBodySamples(CallerIndex, 0, Count);
+        CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count);
         CallerProfile.addTotalSamples(Count);
-        CallerProfile.addCalledTargetSamples(CallerIndex, 0,
+        CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator,
                                              ContextNode->getFuncName(), Count);
       }
     }
@@ -1190,7 +1193,8 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
   for (auto &I : FrameSamples) {
     for (auto *FunctionProfile : I.second) {
       for (auto *Probe : I.first->getProbes()) {
-        FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
+        FunctionProfile->addBodySamples(Probe->getIndex(),
+                                        Probe->getDiscriminator(), 0);
       }
     }
   }
@@ -1213,8 +1217,9 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
     if (CalleeName.size() == 0)
       continue;
-    FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
-                                           Count);
+    FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
+                                           CallProbe->getDiscriminator(),
+                                           CalleeName, Count);
   }
 }
 


        


More information about the llvm-commits mailing list