[llvm] b1a45c6 - [llvm-profgen] Ignore branch count against outline function
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 7 14:04:03 PDT 2021
Author: wlei
Date: 2021-10-07T14:03:34-07:00
New Revision: b1a45c62f03ecbeb4544b0c65a01ee4586235a61
URL: https://github.com/llvm/llvm-project/commit/b1a45c62f03ecbeb4544b0c65a01ee4586235a61
DIFF: https://github.com/llvm/llvm-project/commit/b1a45c62f03ecbeb4544b0c65a01ee4586235a61.diff
LOG: [llvm-profgen] Ignore branch count against outline function
For some transformations like hot-cold split or coro split, it can outline its part of function ranges. Since sample loader is the early stage of backend and no split happens at that time, compiler can't recognize those function, so in llvm-profgen we should attribute the sample to the original function. This is already done for the body range samples since we use the symbols from dwarf which is created before the split.
But for branch samples, the call from master function to its outlined function is actually not a call to the original function, we shouldn't add head/callsie samples for it. So instead of dwarf symbol, we use the symbols from symbol table and ignore those functions with special suffixes(like `.cold` ,`.resume`) for accumulating the callsite/head samples.
Reviewed By: hoy, wenlei
Differential Revision: https://reviews.llvm.org/D110864
Added:
llvm/test/tools/llvm-profgen/Inputs/coroutine.perfbin
llvm/test/tools/llvm-profgen/Inputs/coroutine.perfscript
llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin
llvm/test/tools/llvm-profgen/Inputs/func-split.perfscript
llvm/test/tools/llvm-profgen/coroutine.test
llvm/test/tools/llvm-profgen/func-split.test
Modified:
llvm/tools/llvm-profgen/ProfileGenerator.cpp
llvm/tools/llvm-profgen/ProfileGenerator.h
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-profgen/Inputs/coroutine.perfbin b/llvm/test/tools/llvm-profgen/Inputs/coroutine.perfbin
new file mode 100755
index 0000000000000..927f8794d8040
Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/coroutine.perfbin
diff er
diff --git a/llvm/test/tools/llvm-profgen/Inputs/coroutine.perfscript b/llvm/test/tools/llvm-profgen/Inputs/coroutine.perfscript
new file mode 100644
index 0000000000000..6245c122cef85
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/coroutine.perfscript
@@ -0,0 +1,2 @@
+ 401d4a 0x401d4a/0x402b60/P/-/-/14 0x401d04/0x401d43/M/-/-/1 0x401ceb/0x401cf0/P/-/-/1 0x401f74/0x401ce3/P/-/-/1 0x401f6f/0x401f74/P/-/-/1 0x401aff/0x401f60/P/-/-/10 0x402df6/0x401a50/P/-/-/2 0x402c63/0x402de0/P/-/-/1 0x402c51/0x402c5c/P/-/-/1 0x402dba/0x402c4f/P/-/-/3 0x402c4a/0x402da0/P/-/-/2 0x401999/0x402c30/P/-/-/1 0x4019d5/0x401995/P/-/-/1 0x4019c2/0x4019c7/P/-/-/1 0x402cbc/0x4019bf/P/-/-/2 0x402e3e/0x402cb5/P/-/-/2 0x402cb0/0x402e20/P/-/-/2 0x4019ba/0x402ca0/P/-/-/1 0x4019ab/0x4019b6/P/-/-/3 0x4019a1/0x4019a6/P/-/-/1 0x402c95/0x40199e/P/-/-/5 0x402c79/0x402c88/P/-/-/3 0x402e1d/0x402c74/P/-/-/14 0x402c6f/0x402e00/P/-/-/1 0x402dfd/0x402c68/P/-/-/1 0x401f5f/0x402df8/P/-/-/1 0x401f4f/0x401f54/P/-/-/1 0x401d17/0x401f4f/P/-/-/1 0x401d0a/0x401d0f/P/-/-/3 0x401cde/0x401cf0/P/-/-/1 0x402b2d/0x401cd6/P/-/-/2 0x401cd1/0x402b20/P/-/-/9
+ 401c7b 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/15 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/15 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/13 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/13 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/10 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/11 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/13 0x401c34/0x401c3f/P/-/-/1
diff --git a/llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin b/llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin
new file mode 100755
index 0000000000000..cc01a7c422124
Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin
diff er
diff --git a/llvm/test/tools/llvm-profgen/Inputs/func-split.perfscript b/llvm/test/tools/llvm-profgen/Inputs/func-split.perfscript
new file mode 100644
index 0000000000000..f13b780fd79f7
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/func-split.perfscript
@@ -0,0 +1,3 @@
+ 4004f0
+ 5541f689495641d7
+ 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/1 0x400633/0x400516/P/-/-/1 0x4004da/0x400631/P/-/-/2 0x40062c/0x4004c0/P/-/-/1 0x40050d/0x40062a/P/-/-/3 0x40051f/0x4004f0/P/-/-/5 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/5
diff --git a/llvm/test/tools/llvm-profgen/coroutine.test b/llvm/test/tools/llvm-profgen/coroutine.test
new file mode 100644
index 0000000000000..981237c6e2447
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/coroutine.test
@@ -0,0 +1,83 @@
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/coroutine.perfscript --binary=%S/Inputs/coroutine.perfbin --output=%t
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK
+
+; Check that the head sample count for ticker is 0.
+; CHECK: _Z6tickeri:353:0
+; CHECK-NOT: _Z6tickeri.resume
+
+
+/*
+ * Inputs/coroutine.perfbin is generated by compiling the following source code:
+ * clang++ coroutine.cpp -std=c++2a -g2 -o coroutine
+ */
+
+#include <cstdint>
+#include <cstdlib>
+#include <ctime>
+#include <experimental/coroutine>
+#include <iostream>
+
+struct task {
+ struct promise_type {
+ task get_return_object() { return {}; }
+ std::experimental::suspend_never initial_suspend() { return {}; }
+ std::experimental::suspend_never final_suspend() noexcept { return {}; }
+ void return_void() {}
+ void unhandled_exception() {}
+ };
+};
+
+template <typename T>
+struct generator {
+ struct promise_type;
+ using handle = std::experimental::coroutine_handle<promise_type>;
+ struct promise_type {
+ int current_value;
+ static auto get_return_object_on_allocation_failure() { return generator{nullptr}; }
+ auto get_return_object() { return generator{handle::from_promise(*this)}; }
+ auto initial_suspend() { return std::experimental::suspend_always{}; }
+ auto final_suspend() { return std::experimental::suspend_always{}; }
+ void unhandled_exception() { std::terminate(); }
+ void return_void() {}
+ auto yield_value(int value) {
+ current_value = value;
+ return std::experimental::suspend_always{};
+ }
+ };
+ bool move_next() { return coro ? (coro.resume(), !coro.done()) : false; }
+ int current_value() { return coro.promise().current_value; }
+ generator(generator const &) = delete;
+ generator(generator &&rhs) : coro(rhs.coro) { rhs.coro = nullptr; }
+ ~generator() {
+ if (coro)
+ coro.destroy();
+ }
+
+private:
+ generator(handle h) : coro(h) {}
+ handle coro;
+};
+
+generator<int> ticker(int count) {
+ for (int i = 0; i < count; ++i) {
+ srand(time(NULL));
+ uint32_t a = rand() % 10 + 1;
+ uint32_t b = rand() % 10 + 1;
+ uint64_t c = 0;
+ for (int i = 0; i < 1500; ++i) {
+ c = ((uint64_t)a) + b;
+ a = b;
+ b = c % 2147483648ULL;
+ }
+ co_yield a;
+ }
+}
+
+int main() {
+ auto g = ticker(500000);
+ uint64_t ans = 0;
+ while (g.move_next()) {
+ ans += g.current_value();
+ }
+ std::cout << ans << "\n";
+}
diff --git a/llvm/test/tools/llvm-profgen/func-split.test b/llvm/test/tools/llvm-profgen/func-split.test
new file mode 100644
index 0000000000000..49c77a24ecb65
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/func-split.test
@@ -0,0 +1,69 @@
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/func-split.perfscript --binary=%S/Inputs/func-split.perfbin --output=%t
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/func-split.perfscript --binary=%S/Inputs/func-split.perfbin --output=%t --ignore-stack-samples
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX
+
+;CHECK: [foo]:408:0
+;CHECK: 2.1: 27
+;CHECK: 3: 27
+;CHECK: 3.1: 2 bar:2
+;CHECK: 3.2: 26
+;CHECK: [foo:3.1 @ bar]:8:0
+;CHECK: 1: 1
+;CHECK: 5: 1
+;CHECK: [bar]:0:1
+
+;CHECK-NOT: foo.cold
+
+;CHECK-STRIP-CTX: foo:408:0
+;CHECK-STRIP-CTX: 0: 0
+;CHECK-STRIP-CTX: 2.1: 27
+;CHECK-STRIP-CTX: 3: 27
+;CHECK-STRIP-CTX: 3.1: 1 bar:1
+;CHECK-STRIP-CTX: 3.2: 26
+;CHECK-STRIP-CTX: 4: 0
+;CHECK-STRIP-CTX: bar:8:1
+;CHECK-STRIP-CTX: 1: 1
+;CHECK-STRIP-CTX: 5: 1
+
+;CHECK-STRIP-CTX-NOT: foo.cold
+
+
+; clang -g -O3 -fdebug-info-for-profiling func-split.c -mllvm -mfs-count-threshold=0
+; -fprofile-sample-use=profile.txt -fno-inline -mllvm --enable-split-machine-functions=1
+
+#include <stdio.h>
+
+int bar(int x, int y) {
+ if (x % 3) {
+ return x - y;
+ }
+ return x + y;
+}
+
+void foo() {
+ int s, i = 0;
+ while (i++ < 4000 * 4000)
+ if (i % 91 == 0) s = bar(i, s); else s += 30;
+ printf("sum is %d\n", s);
+}
+
+int main() {
+ foo();
+ return 0;
+}
+
+; profile.txt:
+
+foo:106269:0
+ 2.1: 2268
+ 2.2: 2217
+ 3: 2268
+ 3.1: 1 bar:1
+ 3.2: 2192
+bar:1032:1
+ 0: 24
+ 1: 24
+ 2: 16
+ 4: 8
+ 5: 24
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index b5fff0bbf66b4..5d1cde308e8d6 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -397,6 +397,24 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
}
}
+static bool isOutlinedFunction(StringRef CalleeName) {
+ // Check whether it's from hot-cold func split or coro split.
+ return CalleeName.find(".resume") != StringRef::npos ||
+ CalleeName.find(".cold") != StringRef::npos;
+}
+
+StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
+ // Get the callee name by branch target if it's a call branch.
+ StringRef CalleeName = FunctionSamples::getCanonicalFnName(
+ Binary->getFuncFromStartOffset(TargetOffset));
+
+ // We won't accumulate sample count againt outlined function.
+ if (CalleeName.size() == 0 || isOutlinedFunction(CalleeName))
+ return StringRef();
+
+ return CalleeName;
+}
+
void ProfileGenerator::populateBoundarySamplesForAllFunctions(
const BranchSample &BranchCounters) {
for (auto Entry : BranchCounters) {
@@ -405,9 +423,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
uint64_t Count = Entry.second;
assert(Count != 0 && "Unexpected zero weight branch");
- // Get the callee name by branch target if it's a call branch.
- StringRef CalleeName = FunctionSamples::getCanonicalFnName(
- Binary->getFuncFromStartOffset(TargetOffset));
+ StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
// Record called target sample and its count.
@@ -551,9 +567,7 @@ void CSProfileGenerator::populateBoundarySamplesForFunction(
uint64_t Count = Entry.second;
assert(Count != 0 && "Unexpected zero weight branch");
- // Get the callee name by branch target if it's a call branch
- StringRef CalleeName = FunctionSamples::getCanonicalFnName(
- Binary->getFuncFromStartOffset(TargetOffset));
+ StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
@@ -804,8 +818,7 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
getFunctionProfileForLeafProbe(ContextStack, CallProbe);
FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
FunctionProfile.addTotalSamples(Count);
- StringRef CalleeName = FunctionSamples::getCanonicalFnName(
- Binary->getFuncFromStartOffset(TargetOffset));
+ StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 4d334de0f41ea..9e5aecef88475 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -74,7 +74,7 @@ class ProfileGeneratorBase {
void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
const SampleContextFrame &LeafLoc,
uint64_t Count);
-
+ StringRef getCalleeNameForOffset(uint64_t TargetOffset);
// Used by SampleProfileWriter
SampleProfileMap ProfileMap;
More information about the llvm-commits
mailing list