[llvm] [BOLT][merge-fdata] Fix basic sample profile aggregation without LBR info (PR #118481)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 04:31:44 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Tibor Dusnoki (tdusnoki)
<details>
<summary>Changes</summary>
When a basic sample profile is gathered without LBR info, the generated profile contains a "no-lbr" tag in the first line of the fdata file. This PR fixes merge-fdata to recognize and save this tag to the output file.
---
Full diff: https://github.com/llvm/llvm-project/pull/118481.diff
5 Files Affected:
- (added) bolt/test/X86/Inputs/no-lbr_profile_1.fdata (+305)
- (added) bolt/test/X86/Inputs/no-lbr_profile_2.fdata (+290)
- (added) bolt/test/X86/merge-fdata-lbr-mode.test (+6)
- (added) bolt/test/X86/merge-fdata-no-lbr-mode.test (+8)
- (modified) bolt/tools/merge-fdata/merge-fdata.cpp (+7)
``````````diff
diff --git a/bolt/test/X86/Inputs/no-lbr_profile_1.fdata b/bolt/test/X86/Inputs/no-lbr_profile_1.fdata
new file mode 100644
index 00000000000000..34035b12c5bf0f
--- /dev/null
+++ b/bolt/test/X86/Inputs/no-lbr_profile_1.fdata
@@ -0,0 +1,305 @@
+no_lbr cycles:HG:
+1 __aarch64_cas4_acq/1 c 2
+1 __aarch64_cas4_acq/1 0 20
+1 __aarch64_cas4_acq/1 14 954
+1 __aarch64_swp4_rel/1 14 657
+1 __ieee754_fmod 54 1
+1 __ieee754_fmod 14 1
+1 __ieee754_fmod c8 59
+1 __ieee754_fmod 74 2
+1 __ieee754_fmod 8c 119
+1 __ieee754_fmod b4 55
+1 __ieee754_fmod 78 95
+1 __ieee754_fmod a0 93
+1 __ieee754_fmod 94 33
+1 __ieee754_fmod c4 96
+1 __ieee754_fmod 84 31
+1 __ieee754_fmod 80 9
+1 __ieee754_fmod c0 38
+1 __ieee754_fmod 1c 12
+1 __ieee754_fmod 88 111
+1 __ieee754_fmod 18 14
+1 __ieee754_fmod 98 95
+1 __ieee754_fmod ac 13
+1 __ieee754_fmod 58 7
+1 __ieee754_fmod 3c 14
+1 __ieee754_fmod b0 39
+1 __ieee754_fmod 7c 32
+1 __ieee754_fmod 20 1
+1 __ieee754_fmod 5c 16
+1 __ieee754_fmod bc 3
+1 __ieee754_fmod 90 127
+1 __ieee754_fmod 8 2
+1 __ieee754_fmod e0 2
+1 __ieee754_fmod 28 1
+1 __ieee754_fmod 38 5
+1 __ieee754_fmod 68 2
+1 __random/1 34 14
+1 __random/1 18 19
+1 __random/1 14 17
+1 __random/1 84 49
+1 __random/1 c 1
+1 __random/1 64 85
+1 __random/1 3c 60
+1 __random/1 78 7
+1 __random/1 58 33
+1 __random/1 2c 2
+1 __random_r/1 58 1
+1 __random_r/1 2c 31
+1 __random_r/1 c 39
+1 __random_r/1 48 38
+1 cos 300 1
+1 cos 438 1
+1 cos 32c 3
+1 cos 370 3
+1 cos 5bc 15
+1 cos 14c 2
+1 cos 414 5
+1 cos 324 1
+1 cos 2bc 28
+1 cos 44c 6
+1 cos 48 1
+1 cos 238 17
+1 cos 38 18
+1 cos 50 2
+1 cos 440 49
+1 cos 30c 5
+1 cos 46c 7
+1 cos 68 3
+1 cos 5c0 11
+1 cos 2c4 40
+1 cos 18 17
+1 cos 3d8 39
+1 cos 5c4 12
+1 cos 350 6
+1 cos 5b8 59
+1 cos 6c 4
+1 cos 2ec 16
+1 cos 64 25
+1 cos 468 2
+1 cos 70 90
+1 cos 248 17
+1 cos 2d8 12
+1 cos 26c 1
+1 cos 344 5
+1 cos 2c0 45
+1 cos 58 7
+1 cos 328 4
+1 cos 308 7
+1 cos 40c 10
+1 cos 3f8 27
+1 cos 5d0 9
+1 cos 3e0 55
+1 cos 3e4 16
+1 cos 274 5
+1 cos 2cc 70
+1 cos 264 3
+1 cos 28c 6
+1 cos 5e8 4
+1 cos 444 7
+1 cos 4c 3
+1 cos 3dc 52
+1 cos 304 1
+1 cos 288 16
+1 cos 608 2
+1 cos 6e0 14
+1 cos 2a8 14
+1 cos 2d4 7
+1 cos 340 32
+1 cos 5dc 2
+1 cos 138 18
+1 cos 268 11
+1 cos 2e8 3
+1 cos 3f4 2
+1 cos 400 1
+1 cos 2b4 4
+1 cos 294 2
+1 cos 420 4
+1 cos 2e4 1
+1 cos 418 2
+1 cos 36c 1
+1 exp 54 2
+1 exp 3c 1
+1 exp 70 10
+1 exp 50 228
+1 exp 48 1
+1 exp 14 164
+1 exp 34 12
+1 exp 0 20
+1 exp 74 4
+1 fmod 8 11
+1 fmod 28 19
+1 fmod 14 3
+1 fmod 4 11
+1 fmod 24 9
+1 fmod 0 2
+1 fmod 20 1
+1 log a8 2
+1 log 94 12
+1 log 20 78
+1 log b4 14
+1 log 68 79
+1 log 10 5
+1 log 30 25
+1 log 0 10
+1 log 50 14
+1 log 88 6
+1 log 74 18
+1 main 1c8 1
+1 main 70 1
+1 main e0 1
+1 main 48 3
+1 main 110 3
+1 main 12c 19
+1 main cc 21
+1 main d8 15
+1 main 44 9
+1 main d4 5
+1 main 114 2
+1 main 7c 18
+1 main fc 160
+1 main f8 27
+1 main 11c 21
+1 main bc 9
+1 main b8 26
+1 main a4 4
+1 main 94 15
+1 main 60 19
+1 main b0 2
+1 rand 10 1
+1 rand 4 14
+1 rand 8 13
+1 sin 2a0 1
+1 sin 25c 1
+1 sin 260 1
+1 sin a0 1
+1 sin 608 1
+1 sin 1c4 1
+1 sin 74 2
+1 sin 4a8 2
+1 sin 228 1
+1 sin 4b8 27
+1 sin 45c 1
+1 sin 58 6
+1 sin 274 21
+1 sin 220 15
+1 sin 0 9
+1 sin 628 1
+1 sin 284 6
+1 sin 2a8 3
+1 sin 208 9
+1 sin 4bc 4
+1 sin 700 7
+1 sin 4ac 6
+1 sin 70 29
+1 sin 280 2
+1 sin 4e8 1
+1 sin 240 1
+1 sin 5f0 6
+1 sin 5e4 16
+1 sin 290 1
+1 sin 204 3
+1 sin 270 2
+1 sin 4c8 3
+1 sin 458 5
+1 sin 60 4
+1 sin 1c 17
+1 sin 170 17
+1 sin 5e0 2
+1 sin 464 1
+1 sin 4d0 2
+1 sin 234 8
+1 sin 484 1
+1 sin 4e4 5
+1 sin 4c4 9
+1 sin 468 1
+1 sin 64 13
+1 sin 20c 4
+1 sin 200 10
+1 sin 184 1
+1 sin 1f0 10
+1 sin 5dc 6
+1 sin 1a4 2
+1 sin 1d0 17
+1 sin 23c 4
+1 sin 48c 7
+1 sin 3c 17
+1 sin 488 3
+1 sin 164 5
+1 sin 30 1
+1 sin 1e4 5
+1 sin 1b0 16
+1 sin 450 4
+1 sin 190 23
+1 sin 470 19
+1 sin 230 1
+1 sin 4b0 3
+1 sin 10 4
+1 sin 78 3
+1 sin 5c 5
+1 sin 278 6
+1 sin 248 2
+1 sin 238 2
+1 sin 5fc 2
+1 sin 490 1
+1 sin 268 3
+1 sin 2a4 4
+1 sin 498 1
+1 sqrt 1c 66
+1 tan 374 1
+1 tan 254 1
+1 tan 344 1
+1 tan 230 1
+1 tan 1e0 1
+1 tan 1c4 1
+1 tan 510 1
+1 tan 5c4 2
+1 tan 2b8 1
+1 tan 140 2
+1 tan 6c 1
+1 tan c 16
+1 tan 64 36
+1 tan 3b4 12
+1 tan 2c 16
+1 tan 2ac 16
+1 tan 5f8 24
+1 tan 358 32
+1 tan 4c 19
+1 tan 8c 22
+1 tan 134 1
+1 tan 26c 3
+1 tan 124 20
+1 tan 5bc 4
+1 tan 234 1
+1 tan 78 25
+1 tan 1c0 6
+1 tan 250 1
+1 tan 560 2
+1 tan 30c 13
+1 tan 390 157
+1 tan 378 20
+1 tan 33c 2
+1 tan 51c 319
+1 tan 2ec 22
+1 tan 5a8 8
+1 tan 98 22
+1 tan 144 18
+1 tan 548 9
+1 tan 128 16
+1 tan 130 3
+1 tan 5b8 3
+1 tan 394 25
+1 tan 340 6
+1 tan 148 7
+1 tan 1d8 2
+1 tan 2cc 25
+1 tan 4f0 1
+1 tan 18 1
+1 tan 5c0 5
+1 tan 544 2
+1 tan 540 6
+1 tan 550 3
+1 tan 214 5
+1 tan 338 2
+1 tan 354 3
+1 tan 53c 8
diff --git a/bolt/test/X86/Inputs/no-lbr_profile_2.fdata b/bolt/test/X86/Inputs/no-lbr_profile_2.fdata
new file mode 100644
index 00000000000000..f057d7a5021034
--- /dev/null
+++ b/bolt/test/X86/Inputs/no-lbr_profile_2.fdata
@@ -0,0 +1,290 @@
+no_lbr cycles:HG:
+1 __aarch64_cas4_acq/1 14 981
+1 __aarch64_cas4_acq/1 0 23
+1 __aarch64_swp4_rel/1 c 2
+1 __aarch64_swp4_rel/1 14 663
+1 __ieee754_fmod 80 17
+1 __ieee754_fmod bc 4
+1 __ieee754_fmod b4 56
+1 __ieee754_fmod 58 12
+1 __ieee754_fmod b0 49
+1 __ieee754_fmod c0 44
+1 __ieee754_fmod 18 12
+1 __ieee754_fmod 88 98
+1 __ieee754_fmod 38 12
+1 __ieee754_fmod c4 73
+1 __ieee754_fmod 1c 11
+1 __ieee754_fmod 98 85
+1 __ieee754_fmod a0 85
+1 __ieee754_fmod c8 65
+1 __ieee754_fmod ac 15
+1 __ieee754_fmod 8c 113
+1 __ieee754_fmod 8 1
+1 __ieee754_fmod 84 35
+1 __ieee754_fmod 78 103
+1 __ieee754_fmod 5c 8
+1 __ieee754_fmod 3c 8
+1 __ieee754_fmod 68 1
+1 __ieee754_fmod e0 4
+1 __ieee754_fmod 7c 38
+1 __ieee754_fmod 14 1
+1 __ieee754_fmod 90 119
+1 __ieee754_fmod 94 27
+1 __random/1 30 1
+1 __random/1 2c 1
+1 __random/1 14 17
+1 __random/1 c 1
+1 __random/1 84 45
+1 __random/1 78 5
+1 __random/1 64 80
+1 __random/1 58 35
+1 __random/1 34 18
+1 __random/1 3c 61
+1 __random/1 18 25
+1 __random_r/1 58 3
+1 __random_r/1 c 39
+1 __random_r/1 2c 35
+1 __random_r/1 48 41
+1 cos 448 1
+1 cos 3f4 2
+1 cos 2ac 3
+1 cos 2c8 1
+1 cos 32c 2
+1 cos 2cc 66
+1 cos 3d8 42
+1 cos 2bc 29
+1 cos 138 15
+1 cos 414 7
+1 cos 344 6
+1 cos 370 10
+1 cos 4c 12
+1 cos 2e4 2
+1 cos 350 4
+1 cos 38 13
+1 cos 248 15
+1 cos 264 5
+1 cos 5b8 52
+1 cos 64 18
+1 cos 328 3
+1 cos 3dc 44
+1 cos 50 6
+1 cos 184 1
+1 cos 58 7
+1 cos 2ec 21
+1 cos 300 5
+1 cos 70 88
+1 cos 2d8 13
+1 cos 6dc 3
+1 cos 6e0 11
+1 cos 27c 2
+1 cos 2c0 50
+1 cos 3e0 41
+1 cos 68 1
+1 cos 46c 8
+1 cos 2d4 5
+1 cos 340 24
+1 cos 3f8 29
+1 cos 2a8 21
+1 cos 48 3
+1 cos 44c 2
+1 cos 440 51
+1 cos 2c4 41
+1 cos 238 16
+1 cos 5d0 11
+1 cos 3e4 19
+1 cos 324 2
+1 cos 2b8 1
+1 cos 18 18
+1 cos 5bc 18
+1 cos 5c4 13
+1 cos 6c 4
+1 cos 274 1
+1 cos 268 18
+1 cos 54 5
+1 cos 294 3
+1 cos 288 5
+1 cos 420 7
+1 cos 26c 4
+1 cos 308 10
+1 cos 314 1
+1 cos 5c0 18
+1 cos 444 9
+1 cos 2b4 2
+1 cos 5e8 2
+1 cos 28c 1
+1 cos 40c 7
+1 cos 84 1
+1 cos 418 2
+1 cos 304 1
+1 cos 5dc 3
+1 exp 2c 1
+1 exp 24 2
+1 exp 54 3
+1 exp 50 201
+1 exp 34 26
+1 exp 14 140
+1 exp 4c 1
+1 exp 70 14
+1 exp 0 14
+1 exp 10 1
+1 exp 74 1
+1 fmod 18 4
+1 fmod 24 12
+1 fmod 28 33
+1 fmod 4 17
+1 fmod 8 13
+1 fmod 20 1
+1 fmod 14 1
+1 log 68 72
+1 log 94 12
+1 log 74 18
+1 log a8 5
+1 log 50 15
+1 log b4 10
+1 log 0 12
+1 log 20 72
+1 log 30 28
+1 log 88 6
+1 main 44 16
+1 main bc 12
+1 main cc 22
+1 main 12c 12
+1 main 70 1
+1 main f8 18
+1 main 48 3
+1 main 114 3
+1 main e0 4
+1 main fc 143
+1 main 60 22
+1 main d8 6
+1 main d0 1
+1 main b8 25
+1 main dc 1
+1 main 94 20
+1 main 11c 16
+1 main 110 1
+1 main b0 4
+1 main 7c 13
+1 main 130 4
+1 main a4 1
+1 rand 4 24
+1 rand 8 13
+1 rand 10 3
+1 rand 0 1
+1 sin 4a8 1
+1 sin 260 1
+1 sin 490 1
+1 sin 2a0 3
+1 sin 2a8 1
+1 sin 1c4 3
+1 sin 228 1
+1 sin 248 1
+1 sin 234 6
+1 sin 628 1
+1 sin 700 6
+1 sin 0 25
+1 sin 1c 13
+1 sin 2a4 5
+1 sin 4d0 1
+1 sin 290 1
+1 sin 3c 18
+1 sin 208 4
+1 sin 1f0 10
+1 sin 268 2
+1 sin 5e4 18
+1 sin 25c 1
+1 sin 60 2
+1 sin 5dc 1
+1 sin 274 24
+1 sin 1e4 2
+1 sin 450 6
+1 sin 1d0 24
+1 sin 23c 4
+1 sin 258 3
+1 sin 64 12
+1 sin 468 5
+1 sin 1a4 3
+1 sin 78 6
+1 sin 220 14
+1 sin 4b8 20
+1 sin 458 5
+1 sin 470 20
+1 sin 170 13
+1 sin 164 4
+1 sin 5f0 7
+1 sin 484 8
+1 sin 4b0 2
+1 sin 190 27
+1 sin 184 3
+1 sin 4c4 3
+1 sin 70 14
+1 sin 284 3
+1 sin 58 9
+1 sin 288 1
+1 sin 10 4
+1 sin 210 5
+1 sin 278 6
+1 sin 4bc 4
+1 sin 4ac 2
+1 sin 608 2
+1 sin 498 2
+1 sin 1b0 23
+1 sin 4e4 3
+1 sin 280 1
+1 sin 20c 12
+1 sin 45c 7
+1 sin 48c 5
+1 sin 218 2
+1 sin 5c 5
+1 sin 200 5
+1 sqrt 1c 62
+1 tan 550 1
+1 tan 588 2
+1 tan 120 1
+1 tan 33c 2
+1 tan 5c4 2
+1 tan 540 4
+1 tan 214 2
+1 tan 8c 30
+1 tan 374 4
+1 tan c 15
+1 tan 2ec 17
+1 tan 408 1
+1 tan 64 49
+1 tan 2cc 17
+1 tan 26c 4
+1 tan 140 2
+1 tan 548 6
+1 tan 144 14
+1 tan 358 31
+1 tan 340 4
+1 tan 78 28
+1 tan 148 7
+1 tan 560 2
+1 tan 128 16
+1 tan 270 1
+1 tan 5bc 1
+1 tan 394 27
+1 tan 30c 19
+1 tan 378 20
+1 tan 2ac 26
+1 tan 3b4 17
+1 tan 5b8 1
+1 tan 51c 353
+1 tan 35c 2
+1 tan 98 22
+1 tan 5f8 22
+1 tan 2c 18
+1 tan 5a8 6
+1 tan 5c0 4
+1 tan 124 14
+1 tan 1e8 1
+1 tan 4c 21
+1 tan 1c4 6
+1 tan 510 2
+1 tan 1c0 4
+1 tan 338 1
+1 tan 390 169
+1 tan 6c 2
+1 tan 3b0 3
+1 tan 53c 10
diff --git a/bolt/test/X86/merge-fdata-lbr-mode.test b/bolt/test/X86/merge-fdata-lbr-mode.test
new file mode 100644
index 00000000000000..d32c9caec90e6a
--- /dev/null
+++ b/bolt/test/X86/merge-fdata-lbr-mode.test
@@ -0,0 +1,6 @@
+## Check that merge-fdata tool doesn't falsely print no_lbr when not in no-lbr mode
+
+RUN: merge-fdata %S/Inputs/blarge.fdata %S/Inputs/blarge.fdata \
+RUN: | FileCheck %s --check-prefix=CHECK-FDATA
+
+CHECK-FDATA-NOT: no_lbr
diff --git a/bolt/test/X86/merge-fdata-no-lbr-mode.test b/bolt/test/X86/merge-fdata-no-lbr-mode.test
new file mode 100644
index 00000000000000..d63f98e096927d
--- /dev/null
+++ b/bolt/test/X86/merge-fdata-no-lbr-mode.test
@@ -0,0 +1,8 @@
+## Check that merge-fdata tool correctly processes fdata files with header
+## string produced by no-lbr mode (no_lbr)
+RUN: merge-fdata %S/Inputs/no-lbr_profile_1.fdata \
+RUN: %S/Inputs/no-lbr_profile_2.fdata \
+RUN: | FileCheck %s --check-prefix=CHECK-FDATA
+
+CHECK-FDATA: no_lbr
+CHECK-FDATA: 1 tan 234 1
diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp
index 89ca46c1c0a8fa..4b7428020851ec 100644
--- a/bolt/tools/merge-fdata/merge-fdata.cpp
+++ b/bolt/tools/merge-fdata/merge-fdata.cpp
@@ -265,6 +265,7 @@ bool isYAML(const StringRef Filename) {
void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
errs() << "Using legacy profile format.\n";
std::optional<bool> BoltedCollection;
+ std::optional<bool> NoLBR;
std::mutex BoltedCollectionMutex;
typedef StringMap<uint64_t> ProfileTy;
@@ -304,6 +305,10 @@ void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
SmallVector<StringRef> Lines;
SplitString(Buf, Lines, "\n");
for (StringRef Line : Lines) {
+ if (Line.contains("no_lbr")) {
+ NoLBR = true;
+ continue;
+ }
size_t Pos = Line.rfind(" ");
if (Pos == StringRef::npos)
report_error(Filename, "Malformed / corrupted profile");
@@ -336,6 +341,8 @@ void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
if (BoltedCollection.value_or(false))
output() << "boltedcollection\n";
+ if (NoLBR.value_or(true))
+ output() << "no_lbr\n";
for (const auto &[Key, Value] : MergedProfile)
output() << Key << " " << Value << "\n";
``````````
</details>
https://github.com/llvm/llvm-project/pull/118481
More information about the llvm-commits
mailing list