[llvm] [BOLT] Drop converting return profile to call cont (PR #129477)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Tue May 6 22:21:19 PDT 2025
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/129477
>From 45a56be828e5549245417cd08464a6b23debb6b7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sun, 2 Mar 2025 21:14:52 -0800
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
bolt/include/bolt/Profile/DataAggregator.h | 4 ---
bolt/lib/Profile/DataAggregator.cpp | 37 +++-------------------
bolt/test/X86/callcont-fallthru.s | 24 --------------
3 files changed, 4 insertions(+), 61 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 56eb463fc98fc..bfb9a97eb9dc7 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -197,10 +197,6 @@ class DataAggregator : public DataReader {
BoltAddressTranslation *BAT{nullptr};
- /// Whether pre-aggregated profile needs to convert branch profile into call
- /// to continuation fallthrough profile.
- bool NeedsConvertRetProfileToCallCont{false};
-
/// Update function execution profile with a recorded trace.
/// A trace is region of code executed between two LBR entries supplied in
/// execution order.
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index d20626bd5062f..8a0e54020f56b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -720,23 +720,6 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
: isReturn(Func.disassembleInstructionAtOffset(Offset));
};
- // Returns whether \p Offset in \p Func may be a call continuation excluding
- // entry points and landing pads.
- auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
- // No call continuation at a function start.
- if (!Offset)
- return false;
-
- // FIXME: support BAT case where the function might be in empty state
- // (split fragments declared non-simple).
- if (!Func.hasCFG())
- return false;
-
- // The offset should not be an entry point or a landing pad.
- const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
- return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
- };
-
// Mutates \p Addr to an offset into the containing function, performing BAT
// offset translation and parent lookup.
//
@@ -749,35 +732,26 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
Addr -= Func->getAddress();
- bool IsRetOrCallCont =
- IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);
+ bool IsRet = IsFrom && checkReturn(*Func, Addr);
if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
BinaryFunction *ParentFunc = getBATParentFunction(*Func);
if (!ParentFunc)
- return std::pair{Func, IsRetOrCallCont};
+ return std::pair{Func, IsRet};
if (IsFrom)
NumColdSamples += Count;
- return std::pair{ParentFunc, IsRetOrCallCont};
+ return std::pair{ParentFunc, IsRet};
};
- uint64_t ToOrig = To;
auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
- auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false);
+ auto [ToFunc, _] = handleAddress(To, /*IsFrom*/ false);
if (!FromFunc && !ToFunc)
return false;
- // Record call to continuation trace.
- if (NeedsConvertRetProfileToCallCont && FromFunc != ToFunc &&
- (IsReturn || IsCallCont)) {
- LBREntry First{ToOrig - 1, ToOrig - 1, false};
- LBREntry Second{ToOrig, ToOrig, false};
- return doTrace(First, Second, Count);
- }
// Ignore returns.
if (IsReturn)
return true;
@@ -1229,13 +1203,10 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
// return profile into call to continuation fall-through.
auto Type = AggregatedLBREntry::BRANCH;
if (TypeOrErr.get() == "B") {
- NeedsConvertRetProfileToCallCont = true;
Type = AggregatedLBREntry::BRANCH;
} else if (TypeOrErr.get() == "F") {
- NeedsConvertRetProfileToCallCont = true;
Type = AggregatedLBREntry::FT;
} else if (TypeOrErr.get() == "f") {
- NeedsConvertRetProfileToCallCont = true;
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
} else if (TypeOrErr.get() == "T") {
// Trace is expanded into B and [Ff]
diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
index 95cb4c5fc2df4..50eac3bcd73eb 100644
--- a/bolt/test/X86/callcont-fallthru.s
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -4,30 +4,10 @@
# RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
## Link against a DSO to ensure PLT entries.
# RUN: %clangxx %cxxflags %s %t.so -o %t -Wl,-q -nostdlib
-# RUN: link_fdata %s %t %t.pa1 PREAGG1
-# RUN: link_fdata %s %t %t.pa2 PREAGG2
-# RUN: link_fdata %s %t %t.pa3 PREAGG3
# RUN: link_fdata %s %t %t.pat PREAGGT1
# RUN: link_fdata %s %t %t.pat2 PREAGGT2
-## Check normal case: fallthrough is not LP or secondary entry.
# RUN: llvm-strip --strip-unneeded %t -o %t.strip
-# RUN: llvm-objcopy --remove-section=.eh_frame %t.strip %t.noeh
-# RUN: llvm-bolt %t.strip --pa -p %t.pa1 -o %t.out \
-# RUN: --print-cfg --print-only=main | FileCheck %s
-
-## Check that getFallthroughsInTrace correctly handles a trace starting at plt
-## call continuation
-# RUN: llvm-bolt %t.strip --pa -p %t.pa2 -o %t.out2 \
-# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2
-
-## Check that we don't treat secondary entry points as call continuation sites.
-# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \
-# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3
-
-## Check fallthrough to a landing pad case.
-# RUN: llvm-bolt %t.strip --pa -p %t.pa3 -o %t.out \
-# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3
## Check pre-aggregated traces attach call continuation fallthrough count
# RUN: llvm-bolt %t.noeh --pa -p %t.pat -o %t.out \
@@ -67,7 +47,6 @@ main:
movq %rsi, -0x10(%rbp)
callq puts at PLT
## Target is an external-origin call continuation
-# PREAGG1: B X:0 #Ltmp1# 2 0
# PREAGGT1: T X:0 #Ltmp1# #Ltmp4_br# 2
# CHECK: callq puts at PLT
# CHECK-NEXT: count: 2
@@ -87,18 +66,15 @@ Ltmp4_br:
movl $0xa, -0x18(%rbp)
callq foo
## Target is a binary-local call continuation
-# PREAGG1: B #Lfoo_ret# #Ltmp3# 1 0
# PREAGGT1: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1
# CHECK: callq foo
# CHECK-NEXT: count: 1
## PLT call continuation fallthrough spanning the call
-# PREAGG2: F #Ltmp1# #Ltmp3_br# 3
# CHECK2: callq foo
# CHECK2-NEXT: count: 3
## Target is a secondary entry point (unstripped) or a landing pad (stripped)
-# PREAGG3: B X:0 #Ltmp3# 2 0
# PREAGGT2: T X:0 #Ltmp3# #Ltmp3_br# 2
# CHECK3: callq foo
# CHECK3-NEXT: count: 0
>From 9a2c69fb58552234cd4a647bf34087d8c8aab6c4 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sun, 2 Mar 2025 21:29:20 -0800
Subject: [PATCH 2/3] fix test
Created using spr 1.3.4
---
bolt/test/X86/callcont-fallthru.s | 1 +
1 file changed, 1 insertion(+)
diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
index 50eac3bcd73eb..8df5ce8794e93 100644
--- a/bolt/test/X86/callcont-fallthru.s
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -8,6 +8,7 @@
# RUN: link_fdata %s %t %t.pat2 PREAGGT2
# RUN: llvm-strip --strip-unneeded %t -o %t.strip
+# RUN: llvm-objcopy --remove-section=.eh_frame %t.strip %t.noeh
## Check pre-aggregated traces attach call continuation fallthrough count
# RUN: llvm-bolt %t.noeh --pa -p %t.pat -o %t.out \
>From 495ee2877469b358b0cf9f268f7f0d0193116703 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 3 Mar 2025 11:11:45 -0800
Subject: [PATCH 3/3] expect T format
Created using spr 1.3.4
---
bolt/lib/Profile/DataAggregator.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 62d76598f954e..835b8b5a75408 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1199,16 +1199,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
- auto Type = AggregatedLBREntry::BRANCH;
- if (TypeOrErr.get() == "B") {
+ auto Type = AggregatedLBREntry::TRACE;
+ if (LLVM_LIKELY(TypeOrErr.get() == "T")) {
+ } else if (TypeOrErr.get() == "B") {
Type = AggregatedLBREntry::BRANCH;
} else if (TypeOrErr.get() == "F") {
Type = AggregatedLBREntry::FT;
} else if (TypeOrErr.get() == "f") {
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
- } else if (TypeOrErr.get() == "T") {
- // Trace is expanded into B and [Ff]
- Type = AggregatedLBREntry::TRACE;
} else {
reportError("expected T, B, F or f");
return make_error_code(llvm::errc::io_error);
More information about the llvm-commits
mailing list