[llvm] f1affe8 - [llvm-profgen][CSSPGO] Support count based aggregated type of hybrid perf script

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 3 17:57:07 PDT 2021


Author: wlei
Date: 2021-08-03T17:56:35-07:00
New Revision: f1affe8dc88a27cb0d436b6ec4bd44ca5da336a9

URL: https://github.com/llvm/llvm-project/commit/f1affe8dc88a27cb0d436b6ec4bd44ca5da336a9
DIFF: https://github.com/llvm/llvm-project/commit/f1affe8dc88a27cb0d436b6ec4bd44ca5da336a9.diff

LOG: [llvm-profgen][CSSPGO] Support count based aggregated type of hybrid perf script

This change tried to integrate a new count based aggregated type of perf script. The only difference of the format is that an aggregated count is added at the head of the original sample which means the same samples are repeated to the given count times. This is used to reduce the perf script size.
e.g.
```
2
	          4005dc
	          400634
	          400684
	    7f68c5788793
 0x4005c8/0x4005dc/P/-/-/0  ....
```
Implemented by a dedicated PerfReader `AggregatedHybridPerfReader`.

Differential Revision: https://reviews.llvm.org/D107192

Added: 
    llvm/test/tools/llvm-profgen/Inputs/noinline-cs-noprobe.aggperfscript
    llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.aggperfscript

Modified: 
    llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
    llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
    llvm/tools/llvm-profgen/PerfReader.cpp
    llvm/tools/llvm-profgen/PerfReader.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-noprobe.aggperfscript b/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-noprobe.aggperfscript
new file mode 100644
index 0000000000000..fc28005340fcf
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-noprobe.aggperfscript
@@ -0,0 +1,11 @@
+2
+	          4005dc
+	          400634
+	          400684
+	    7f68c5788793
+ 0x4005c8/0x4005dc 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005c8/0x4005dc
+2
+	          4005b0
+	          400684
+	    7f68c5788793
+ 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005c8/0x4005dc 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0 0x400645/0x4005ff 0x400637/0x400645 0x4005e9/0x400634 0x4005d7/0x4005e5 0x40062f/0x4005b0

diff  --git a/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.aggperfscript b/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.aggperfscript
new file mode 100644
index 0000000000000..36e3f784348d4
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.aggperfscript
@@ -0,0 +1,18 @@
+1
+	          20179e
+	          2017f9
+	    7f83e84e7793
+	5541f689495641d7
+ 0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0
+1
+	          2017c4
+	          2017f9
+	    7f83e84e7793
+	5541f689495641d7
+ 0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0
+1
+	          2017c4
+	          2017f9
+	    7f83e84e7793
+	5541f689495641d7
+ 0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0

diff  --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
index 9037e0b01fc15..c6d1a34a3164d 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
@@ -1,5 +1,40 @@
 ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-AGG-UNWINDER
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG
+
+; CHECK-AGG:[main:1 @ foo]:108:0
+; CHECK-AGG: 2: 6
+; CHECK-AGG: 3: 6 bar:6
+; CHECK-AGG:[main:1 @ foo:3 @ bar]:100:6
+; CHECK-AGG: 0: 6
+; CHECK-AGG: 1: 6
+; CHECK-AGG: 2: 4
+; CHECK-AGG: 4: 2
+; CHECK-AGG: 5: 6
+
+; CHECK-AGG-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Range Counter:
+; CHECK-AGG-UNWINDER: main:1 @ foo
+; CHECK-AGG-UNWINDER:   (5ff, 62f): 6
+; CHECK-AGG-UNWINDER:   (634, 637): 6
+; CHECK-AGG-UNWINDER:   (645, 645): 6
+; CHECK-AGG-UNWINDER: main:1 @ foo:3 @ bar
+; CHECK-AGG-UNWINDER:   (5b0, 5c8): 2
+; CHECK-AGG-UNWINDER:   (5b0, 5d7): 4
+; CHECK-AGG-UNWINDER:   (5dc, 5e9): 2
+; CHECK-AGG-UNWINDER:   (5e5, 5e9): 4
+
+; CHECK-AGG-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Branch Counter:
+; CHECK-AGG-UNWINDER: main:1 @ foo
+; CHECK-AGG-UNWINDER:   (62f, 5b0): 6
+; CHECK-AGG-UNWINDER:   (637, 645): 6
+; CHECK-AGG-UNWINDER:   (645, 5ff): 6
+; CHECK-AGG-UNWINDER: main:1 @ foo:3 @ bar
+; CHECK-AGG-UNWINDER:   (5c8, 5dc): 4
+; CHECK-AGG-UNWINDER:   (5d7, 5e5): 4
+; CHECK-AGG-UNWINDER:   (5e9, 634): 6
+
+
 
 ; CHECK:[main:1 @ foo]:54:0
 ; CHECK: 2: 3
@@ -37,7 +72,6 @@
 
 
 
-
 ; original code:
 ; clang -O0 -g test.c -o a.out
 #include <stdio.h>

diff  --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
index 4e434467df143..32da466e18536 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
@@ -1,5 +1,8 @@
 ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: FileCheck %s --input-file %t
+
 
 ; CHECK:     [main:2 @ foo]:75:0
 ; CHECK-NEXT: 1: 0

diff  --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index ff6cd8efe5721..0a32bbcaa8061 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -622,7 +622,7 @@ bool PerfReaderBase::extractCallstack(TraceStream &TraceIt,
          !Binary->addressInPrologEpilog(CallStack.front());
 }
 
-void HybridPerfReader::parseSample(TraceStream &TraceIt) {
+void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
   // The raw hybird sample started with call stack in FILO order and followed
   // intermediately by LBR sample
   // e.g.
@@ -659,7 +659,7 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt) {
       Sample->CallStack.front() = Sample->LBRStack[0].Target;
       // Record samples by aggregation
       Sample->genHashCode();
-      AggregatedSamples[Hashable<PerfSample>(Sample)]++;
+      AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
     }
   } else {
     // LBR sample is encoded in single line after stack sample
@@ -667,6 +667,21 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt) {
   }
 }
 
+uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) {
+  // The aggregated count is optional, so do not skip the line and return 1 if
+  // it's unmatched
+  uint64_t Count = 1;
+  if (!TraceIt.getCurrentLine().getAsInteger(10, Count))
+    TraceIt.advance();
+  return Count;
+}
+
+void PerfReaderBase::parseSample(TraceStream &TraceIt) {
+  uint64_t Count = parseAggregatedCount(TraceIt);
+  assert(Count >= 1 && "Aggregated count should be >= 1!");
+  parseSample(TraceIt, Count);
+}
+
 void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) {
   // Parse a line like:
   //  PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0

diff  --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 630d14a0c44c2..f1a866d1e6b58 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -637,9 +637,13 @@ class PerfReaderBase {
   bool extractLBRStack(TraceStream &TraceIt,
                        SmallVectorImpl<LBREntry> &LBRStack,
                        ProfiledBinary *Binary);
+  uint64_t parseAggregatedCount(TraceStream &TraceIt);
   // Parse one sample from multiple perf lines, override this for 
diff erent
   // sample type
-  virtual void parseSample(TraceStream &TraceIt) = 0;
+  void parseSample(TraceStream &TraceIt);
+  // An aggregated count is given to indicate how many times the sample is
+  // repeated.
+  virtual void parseSample(TraceStream &TraceIt, uint64_t Count) = 0;
   // Post process the profile after trace aggregation, we will do simple range
   // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
   virtual void generateRawProfile() = 0;
@@ -671,7 +675,7 @@ class HybridPerfReader : public PerfReaderBase {
     PerfType = PERF_LBR_STACK;
   };
   // Parse the hybrid sample including the call and LBR line
-  void parseSample(TraceStream &TraceIt) override;
+  void parseSample(TraceStream &TraceIt, uint64_t Count) override;
   void generateRawProfile() override;
 
 private:


        


More information about the llvm-commits mailing list