[llvm] [CSSPGO]Add a flag to limit unsymbolized context depth (PR #121531)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 16:41:49 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Lei Wang (wlei-llvm)
<details>
<summary>Changes</summary>
Adding a new flag(`--csprof-max-unsymbolized-context-depth`) to only limit unsymbolized context depth. Currently,`--csprof-max-context-depth` applies to both symbolized and unsymbolized profile context, there are scenarios where `--csprof-max-context-depth` may not be flexible enough, e.g. if we want to limit the context but still keep all the inlinings from the last frame, we could set the value csprof-max-unsymbolized-context-depth >= 1.
---
Full diff: https://github.com/llvm/llvm-project/pull/121531.diff
3 Files Affected:
- (added) llvm/test/tools/llvm-profgen/context-depth.test (+125)
- (modified) llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test (-20)
- (modified) llvm/tools/llvm-profgen/PerfReader.cpp (+12-1)
``````````diff
diff --git a/llvm/test/tools/llvm-profgen/context-depth.test b/llvm/test/tools/llvm-profgen/context-depth.test
new file mode 100644
index 00000000000000..4eaa5fa1eae9d8
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/context-depth.test
@@ -0,0 +1,125 @@
+; Test --csprof-max-context-depth and --csprof-max-unsymbolized-context-depth
+
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csprof-max-context-depth=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0
+; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csprof-max-unsymbolized-context-depth=2 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --skip-symbolization
+; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-UNSYM-CTX-DEPTH
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csprof-max-unsymbolized-context-depth=2 --csspgo-preinliner=0 --gen-cs-nested-profile=0
+; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-UNSYM-CTX-DEPTH-PROF
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csprof-max-unsymbolized-context-depth=2 --csprof-max-context-depth=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0
+; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH
+
+
+; CHECK-MAX-CTX-DEPTH: [fb]:19:6
+; CHECK-MAX-CTX-DEPTH: 1: 6
+; CHECK-MAX-CTX-DEPTH: 2: 3
+; CHECK-MAX-CTX-DEPTH: 3: 3
+; CHECK-MAX-CTX-DEPTH: 4: 0
+; CHECK-MAX-CTX-DEPTH: 5: 4 fb:4
+; CHECK-MAX-CTX-DEPTH: 6: 3 fa:3
+; CHECK-MAX-CTX-DEPTH: !CFGChecksum: 563022570642068
+; CHECK-MAX-CTX-DEPTH: [fa]:14:4
+; CHECK-MAX-CTX-DEPTH: 1: 4
+; CHECK-MAX-CTX-DEPTH: 3: 4
+; CHECK-MAX-CTX-DEPTH: 4: 2
+; CHECK-MAX-CTX-DEPTH: 5: 1
+; CHECK-MAX-CTX-DEPTH: 6: 0
+; CHECK-MAX-CTX-DEPTH: 7: 2 fb:2
+; CHECK-MAX-CTX-DEPTH: 8: 1 fa:1
+; CHECK-MAX-CTX-DEPTH: !CFGChecksum: 563070469352221
+
+
+; CHECK-MAX-UNSYM-CTX-DEPTH: [0x7ab @ 0x7ab]
+; CHECK-MAX-UNSYM-CTX-DEPTH: 3
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7a0-7a7:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7a0-7ab:3
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7b2-7b5:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 3
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7a7->7b2:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7ab->7a0:4
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7b5->7c0:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: [0x7ab @ 0x7b5]
+; CHECK-MAX-UNSYM-CTX-DEPTH: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7c0-7d4:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7d4->7c0:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: [0x7b5 @ 0x7d4]
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7c0-7cd:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7db-7e0:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7cd->7db:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7e0->7a0:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: [0x7b5 @ 0x7e0]
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7a0-7a7:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7b2-7b5:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7a7->7b2:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7b5->7c0:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: [0x7d4 @ 0x7e0]
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7a0-7a7:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7b2-7b5:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7a7->7b2:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7b5->7c0:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: [0x7e0 @ 0x7b5]
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7c0-7cd:2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7db-7e0:1
+; CHECK-MAX-UNSYM-CTX-DEPTH: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7cd->7db:2
+; CHECK-MAX-UNSYM-CTX-DEPTH: 7e0->7a0:1
+
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: [fb:5 @ fb:5 @ fb]:13:4
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 1: 4
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 2: 3
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 3: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 4: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 5: 4 fb:4
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 6: 1 fa:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: !CFGChecksum: 563022570642068
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: [fa:7 @ fb:6 @ fa]:6:2
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 1: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 3: 2
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 4: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 5: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 6: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 7: 1 fb:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 8: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: !CFGChecksum: 563070469352221
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: [fb:5 @ fb:6 @ fa]:4:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 1: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 3: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 4: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 5: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 6: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 7: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 8: 1 fa:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: !CFGChecksum: 563070469352221
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: [fb:6 @ fa:8 @ fa]:4:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 1: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 3: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 4: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 5: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 6: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 7: 1 fb:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 8: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: !CFGChecksum: 563070469352221
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: [fa:8 @ fa:7 @ fb]:3:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 1: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 2: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 3: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 4: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 5: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 6: 1 fa:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: !CFGChecksum: 563022570642068
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: [fb:6 @ fa:7 @ fb]:3:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 1: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 2: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 3: 1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 4: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 5: 0
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: 6: 1 fa:1
+; CHECK-MAX-UNSYM-CTX-DEPTH-PROF: !CFGChecksum: 563022570642068
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
index c673028584c0dc..b8e3e248e77935 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
@@ -9,9 +9,6 @@
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0
; RUN: FileCheck %s --input-file %t
-; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csprof-max-context-depth=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0
-; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH
-
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
; CHECK-UNCOMPRESS: 1: 1
@@ -68,23 +65,6 @@
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0
; CHECK-UNCOMPRESS: 5: 1 fb:1
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
-; CHECK-MAX-CTX-DEPTH: [fb]:19:6
-; CHECK-MAX-CTX-DEPTH: 1: 6
-; CHECK-MAX-CTX-DEPTH: 2: 3
-; CHECK-MAX-CTX-DEPTH: 3: 3
-; CHECK-MAX-CTX-DEPTH: 4: 0
-; CHECK-MAX-CTX-DEPTH: 5: 4 fb:4
-; CHECK-MAX-CTX-DEPTH: 6: 3 fa:3
-; CHECK-MAX-CTX-DEPTH: !CFGChecksum: 563022570642068
-; CHECK-MAX-CTX-DEPTH: [fa]:14:4
-; CHECK-MAX-CTX-DEPTH: 1: 4
-; CHECK-MAX-CTX-DEPTH: 3: 4
-; CHECK-MAX-CTX-DEPTH: 4: 2
-; CHECK-MAX-CTX-DEPTH: 5: 1
-; CHECK-MAX-CTX-DEPTH: 6: 0
-; CHECK-MAX-CTX-DEPTH: 7: 2 fb:2
-; CHECK-MAX-CTX-DEPTH: 8: 1 fa:1
-; CHECK-MAX-CTX-DEPTH: !CFGChecksum: 563070469352221
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 111c546f5329fb..9f94df6a7d9269 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -42,6 +42,11 @@ static cl::opt<bool>
cl::opt<bool> ShowDetailedWarning("show-detailed-warning",
cl::desc("Show detailed warning message."));
+static cl::opt<int> CSProfMaxUnsymbolizedCtxDepth(
+ "csprof-max-unsymbolized-context-depth", cl::init(-1),
+ cl::desc("Keep the last K contexts while merging unsymbolized profile. -1 "
+ "means no depth limit."));
+
extern cl::opt<std::string> PerfTraceFilename;
extern cl::opt<bool> ShowDisassemblyOnly;
extern cl::opt<bool> ShowSourceLocations;
@@ -172,7 +177,13 @@ std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() {
std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>();
KeyStr->Context = Stack;
CSProfileGenerator::compressRecursionContext<uint64_t>(KeyStr->Context);
- CSProfileGenerator::trimContext<uint64_t>(KeyStr->Context);
+ int Depth = CSProfileGenerator::MaxContextDepth != -1
+ ? CSProfileGenerator::MaxContextDepth
+ : KeyStr->Context.size();
+ Depth = CSProfMaxUnsymbolizedCtxDepth != -1
+ ? std::min(static_cast<int>(CSProfMaxUnsymbolizedCtxDepth), Depth)
+ : Depth;
+ CSProfileGenerator::trimContext<uint64_t>(KeyStr->Context, Depth);
return KeyStr;
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/121531
More information about the llvm-commits
mailing list