[lld] [lld][MachO] Add N_COLD_FUNC support (PR #183909)
Zhaoxuan Jiang via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 05:43:05 PST 2026
https://github.com/nocchijiang updated https://github.com/llvm/llvm-project/pull/183909
>From 5d3a9a812b6525e0869ca0a32c47a452f3506768 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Sat, 28 Feb 2026 16:47:07 +0800
Subject: [PATCH 1/7] [lld][MachO] Add N_COLD_FUNC support
Parse N_COLD_FUNC from input object files and use it to move cold
functions to the end of __text, after all non-cold and ordered symbols.
Key behaviors:
- Cold functions are placed after all non-cold unordered symbols.
- Order file entries take precedence over the cold attribute: a cold
function listed in the order file retains its specified position.
- The N_COLD_FUNC flag is stripped from the output symbol table.
- ICF interaction: when folding identical cold and non-cold functions,
the non-cold copy is kept as the master so the folded body remains
in the hot region. InputSection::isCold() skips ICF-merged symbols
to prevent a non-cold master from being misidentified as cold after
absorbing cold duplicates via folding.
---
lld/MachO/ICF.cpp | 12 +++-
lld/MachO/InputFiles.cpp | 25 +++++---
lld/MachO/InputSection.cpp | 14 +++++
lld/MachO/InputSection.h | 1 +
lld/MachO/SymbolTable.cpp | 8 ++-
lld/MachO/SymbolTable.h | 3 +-
lld/MachO/Symbols.cpp | 4 +-
lld/MachO/Symbols.h | 5 +-
lld/MachO/SyntheticSections.cpp | 2 +
lld/MachO/Writer.cpp | 24 +++++---
lld/test/MachO/cold-func.s | 103 ++++++++++++++++++++++++++++++++
11 files changed, 176 insertions(+), 25 deletions(-)
create mode 100644 lld/test/MachO/cold-func.s
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index b03e2c5a42e00..913539e08469a 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -419,9 +419,17 @@ void ICF::run() {
// When using safe_thunks, ensure that we first sort by icfEqClass and
// then by keepUnique (descending). This guarantees that within an
// equivalence class, the keepUnique inputs are always first.
- if (config->icfLevel == ICFLevel::safe_thunks)
- if (a->icfEqClass[0] == b->icfEqClass[0])
+ if (a->icfEqClass[0] == b->icfEqClass[0]) {
+ if (config->icfLevel == ICFLevel::safe_thunks &&
+ a->keepUnique != b->keepUnique)
return a->keepUnique > b->keepUnique;
+ // Prefer non-cold sections as the master section to preserve locality
+ // for the non-cold paths.
+ bool aCold = a->isCold();
+ bool bCold = b->isCold();
+ if (aCold != bCold)
+ return !aCold;
+ }
return a->icfEqClass[0] < b->icfEqClass[0];
});
forEachClass([&](size_t begin, size_t end) {
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index cc7eae51175bc..980ce6bbd4292 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -673,6 +673,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
+ bool isCold = sym.n_desc & N_COLD_FUNC;
+
if (sym.n_type & N_EXT) {
// -load_hidden makes us treat global symbols as linkage unit scoped.
// Duplicates are reported but the symbol does not go in the export trie.
@@ -716,13 +718,15 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
return symtab->addDefined(
name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
isPrivateExtern, sym.n_desc & REFERENCED_DYNAMICALLY,
- sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden);
+ sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden, isCold);
}
bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
- return make<Defined>(
+ auto *defined = make<Defined>(
name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
/*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
+ defined->cold = isCold;
+ return defined;
}
// Absolute symbols are defined symbols that do not have an associated
@@ -730,6 +734,7 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
template <class NList>
static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
StringRef name, bool forceHidden) {
+ bool isCold = sym.n_desc & N_COLD_FUNC;
assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
if (sym.n_type & N_EXT) {
@@ -738,14 +743,16 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
/*isWeakDef=*/false, isPrivateExtern,
/*isReferencedDynamically=*/false,
sym.n_desc & N_NO_DEAD_STRIP,
- /*isWeakDefCanBeHidden=*/false);
+ /*isWeakDefCanBeHidden=*/false, isCold);
}
- return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
- /*isWeakDef=*/false,
- /*isExternal=*/false, /*isPrivateExtern=*/false,
- /*includeInSymtab=*/true,
- /*isReferencedDynamically=*/false,
- sym.n_desc & N_NO_DEAD_STRIP);
+ auto *defined = make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
+ /*isWeakDef=*/false,
+ /*isExternal=*/false, /*isPrivateExtern=*/false,
+ /*includeInSymtab=*/true,
+ /*isReferencedDynamically=*/false,
+ sym.n_desc & N_NO_DEAD_STRIP);
+ defined->cold = isCold;
+ return defined;
}
template <class NList>
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 34847adc85954..50b0e512a64a3 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -195,6 +195,20 @@ const Relocation *InputSection::getRelocAt(uint32_t off) const {
return &*it;
}
+bool InputSection::isCold() const {
+ if (!isCodeSection(this))
+ return false;
+ for (const Defined *sym : symbols) {
+ // Skip symbols absorbed from ICF-folded sections so that a non-cold master
+ // is not misidentified as cold after folding a cold duplicate.
+ if (sym->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
+ continue;
+ if (sym->isCold())
+ return true;
+ }
+ return false;
+}
+
void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
Symbol::ICFFoldKind foldKind) {
align = std::max(align, copy->align);
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index e0a90a2edc0af..c77ea4b543f82 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -62,6 +62,7 @@ class InputSection {
virtual void markLive(uint64_t off) = 0;
virtual InputSection *canonical() { return this; }
virtual const InputSection *canonical() const { return this; }
+ bool isCold() const;
protected:
InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data,
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 84d3e45d64396..6371214fe9c21 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -104,7 +104,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
uint64_t size, bool isWeakDef,
bool isPrivateExtern,
bool isReferencedDynamically, bool noDeadStrip,
- bool isWeakDefCanBeHidden) {
+ bool isWeakDefCanBeHidden, bool isCold) {
bool overridesWeakDef = false;
auto [s, wasInserted] = insert(name, file);
@@ -119,6 +119,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
defined->weakDefCanBeHidden &= isWeakDefCanBeHidden;
defined->referencedDynamically |= isReferencedDynamically;
defined->noDeadStrip |= noDeadStrip;
+ defined->cold |= isCold;
}
if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) {
concatIsec->wasCoalesced = true;
@@ -211,7 +212,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
Defined *defined = replaceSymbol<Defined>(
s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
isPrivateExtern, /*includeInSymtab=*/true, isReferencedDynamically,
- noDeadStrip, overridesWeakDef, isWeakDefCanBeHidden, interposable);
+ noDeadStrip, overridesWeakDef, isWeakDefCanBeHidden, interposable,
+ isCold);
return defined;
}
@@ -221,7 +223,7 @@ Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
return addDefined(target, newFile, src->isec(), src->value, src->size,
src->isWeakDef(), isPrivateExtern,
src->referencedDynamically, src->noDeadStrip,
- src->weakDefCanBeHidden);
+ src->weakDefCanBeHidden, src->cold);
}
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index e9b03da9d2549..fd87b7891a105 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -39,7 +39,8 @@ class SymbolTable {
Defined *addDefined(StringRef name, InputFile *, InputSection *,
uint64_t value, uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isReferencedDynamically,
- bool noDeadStrip, bool isWeakDefCanBeHidden);
+ bool noDeadStrip, bool isWeakDefCanBeHidden,
+ bool isCold = false);
Defined *aliasDefined(Defined *src, StringRef target, InputFile *newFile,
bool makePrivateExtern = false);
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 9faf01e09de05..27419caf9de1e 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -57,13 +57,13 @@ Defined::Defined(StringRef name, InputFile *file, InputSection *isec,
bool isPrivateExtern, bool includeInSymtab,
bool isReferencedDynamically, bool noDeadStrip,
bool canOverrideWeakDef, bool isWeakDefCanBeHidden,
- bool interposable)
+ bool interposable, bool cold)
: Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
privateExtern(isPrivateExtern), includeInSymtab(includeInSymtab),
identicalCodeFoldingKind(ICFFoldKind::None),
referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
- weakDef(isWeakDef), external(isExternal), originalIsec(isec),
+ cold(cold), weakDef(isWeakDef), external(isExternal), originalIsec(isec),
value(value), size(size) {
if (isec) {
isec->symbols.push_back(this);
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index beb97b35bf881..a51b02ca12cac 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -118,7 +118,7 @@ class Defined : public Symbol {
uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
- bool interposable = false);
+ bool interposable = false, bool cold = false);
bool isWeakDef() const override { return weakDef; }
bool isExternalWeakDef() const {
@@ -128,6 +128,7 @@ class Defined : public Symbol {
bool isExternal() const { return external; }
bool isAbsolute() const { return originalIsec == nullptr; }
+ bool isCold() const { return cold; }
uint64_t getVA() const override;
@@ -178,6 +179,8 @@ class Defined : public Symbol {
bool weakDefCanBeHidden : 1;
+ bool cold : 1;
+
private:
const bool weakDef : 1;
const bool external : 1;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 36d15419a1091..5257433246d6b 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1445,6 +1445,8 @@ template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const {
nList->n_desc |= defined->isExternalWeakDef() ? N_WEAK_DEF : 0;
nList->n_desc |=
defined->referencedDynamically ? REFERENCED_DYNAMICALLY : 0;
+ if (config->outputType == MH_OBJECT)
+ nList->n_desc |= defined->isCold() ? N_COLD_FUNC : 0;
} else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
uint16_t n_desc = nList->n_desc;
int16_t ordinal = ordinalForDylibSymbol(*dysym);
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index f9fd12a13dba3..7b43d6de9f119 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1007,13 +1007,23 @@ static void sortSegmentsAndSections() {
osec->align = tlvAlign;
}
- if (!isecPriorities.empty()) {
- if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
- llvm::stable_sort(
- merged->inputs, [&](InputSection *a, InputSection *b) {
- return isecPriorities.lookup(a) < isecPriorities.lookup(b);
- });
- }
+ if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
+ llvm::stable_sort(merged->inputs,
+ [&](InputSection *a, InputSection *b) {
+ auto aIt = isecPriorities.find(a);
+ auto bIt = isecPriorities.find(b);
+ bool aHasPriority = aIt != isecPriorities.end();
+ bool bHasPriority = bIt != isecPriorities.end();
+ if (aHasPriority != bHasPriority)
+ return aHasPriority;
+ if (aHasPriority)
+ return aIt->second < bIt->second;
+ bool aCold = a->isCold();
+ bool bCold = b->isCold();
+ if (aCold != bCold)
+ return !aCold;
+ return false;
+ });
}
}
}
diff --git a/lld/test/MachO/cold-func.s b/lld/test/MachO/cold-func.s
new file mode 100644
index 0000000000000..27810bde33e2c
--- /dev/null
+++ b/lld/test/MachO/cold-func.s
@@ -0,0 +1,103 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/test.s -o %t.o
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.noorder.exe
+# RUN: llvm-objdump -d %t.noorder.exe | FileCheck %s --check-prefix=NOORDER
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.order-hot.exe -order_file %t/ord-hot
+# RUN: llvm-objdump -d %t.order-hot.exe | FileCheck %s --check-prefix=ORDER-HOT
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.order-cold.exe -order_file %t/ord-cold
+# RUN: llvm-objdump -d %t.order-cold.exe | FileCheck %s --check-prefix=ORDER-COLD
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.exe
+# RUN: llvm-objdump --syms %t.exe | FileCheck %s --check-prefix=EXE
+# RUN: %lld -arch arm64 -lSystem --icf=all %t.o -o %t.icf.exe -map %t/icf.map
+# RUN: FileCheck %s --input-file %t/icf.map --check-prefix=ICF
+# RUN: %lld -arch arm64 -lSystem --icf=safe_thunks %t.o -o %t.safe.exe -map %t/safe.map
+# RUN: FileCheck %s --input-file %t/safe.map --check-prefix=SAFE-THUNKS
+
+#--- test.s
+.subsections_via_symbols
+
+## Mark _cold and _normal as address-significant for safe_thunks testing.
+.addrsig
+.addrsig_sym _cold
+.addrsig_sym _normal
+
+.text
+
+.globl _cold
+.p2align 2
+.desc _cold, 0x400
+_cold:
+ add x0, x1, x2
+ add x3, x4, x5
+ ret
+
+.globl _normal
+.p2align 2
+_normal:
+ add x0, x1, x2
+ add x3, x4, x5
+ ret
+
+.globl _ordered
+.p2align 2
+_ordered:
+ add x0, x1, x2
+ add x3, x4, x5
+ ret
+
+.globl _main
+.p2align 2
+_main:
+ bl _normal
+ bl _cold
+ bl _ordered
+ ret
+
+## Basic N_COLD_FUNC support.
+# NOORDER: <_normal>:
+# NOORDER: <_ordered>:
+# NOORDER: <_main>:
+# NOORDER: <_cold>:
+
+## Ordered symbols should come before unordered cold symbols.
+# ORDER-HOT: <_ordered>:
+# ORDER-HOT: <_normal>:
+# ORDER-HOT: <_main>:
+# ORDER-HOT: <_cold>:
+
+## Cold attribute should not change the ordering of order-file symbols.
+# ORDER-COLD: <_cold>:
+# ORDER-COLD: <_ordered>:
+# ORDER-COLD: <_normal>:
+# ORDER-COLD: <_main>:
+
+## Check that N_COLD_FUNC is NOT preserved in the output executable.
+# EXE: SYMBOL TABLE:
+# EXE-NOT: 0400 {{.*}} _cold
+# EXE: {{.*}} g F __TEXT,__text _cold
+
+## ICF + N_COLD_FUNC: _cold, _normal, and _ordered have identical bodies.
+## After folding, the non-cold copy should be the master so the folded body
+## is not the cold region (after _main).
+# ICF-LABEL: # Symbols:
+# ICF-DAG: _normal
+# ICF-DAG: _cold
+# ICF-DAG: _ordered
+# ICF: _main
+
+## With safe_thunks, _cold and _normal are keepUnique. The non-cold _normal
+## should be chosen as the master, appearing before _main. _cold gets a thunk
+## placed in the cold region.
+# SAFE-THUNKS-LABEL: # Symbols:
+# SAFE-THUNKS: 0x0000000C {{.*}} _normal
+# SAFE-THUNKS: 0x00000000 {{.*}} _ordered
+# SAFE-THUNKS: 0x00000010 {{.*}} _main
+# SAFE-THUNKS: 0x00000004 {{.*}} _cold
+
+#--- ord-hot
+_ordered
+
+#--- ord-cold
+_cold
+_ordered
>From 924b7bcb774f9c11fa78ece147edc95a4f03075d Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Mon, 2 Mar 2026 15:23:33 +0800
Subject: [PATCH 2/7] fix symtab check
---
lld/test/MachO/cold-func.s | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/lld/test/MachO/cold-func.s b/lld/test/MachO/cold-func.s
index 27810bde33e2c..ed8aacf6b3b02 100644
--- a/lld/test/MachO/cold-func.s
+++ b/lld/test/MachO/cold-func.s
@@ -8,7 +8,7 @@
# RUN: %lld -arch arm64 -lSystem %t.o -o %t.order-cold.exe -order_file %t/ord-cold
# RUN: llvm-objdump -d %t.order-cold.exe | FileCheck %s --check-prefix=ORDER-COLD
# RUN: %lld -arch arm64 -lSystem %t.o -o %t.exe
-# RUN: llvm-objdump --syms %t.exe | FileCheck %s --check-prefix=EXE
+# RUN: llvm-nm -m %t.exe | FileCheck %s --check-prefix=EXE
# RUN: %lld -arch arm64 -lSystem --icf=all %t.o -o %t.icf.exe -map %t/icf.map
# RUN: FileCheck %s --input-file %t/icf.map --check-prefix=ICF
# RUN: %lld -arch arm64 -lSystem --icf=safe_thunks %t.o -o %t.safe.exe -map %t/safe.map
@@ -73,9 +73,7 @@ _main:
# ORDER-COLD: <_main>:
## Check that N_COLD_FUNC is NOT preserved in the output executable.
-# EXE: SYMBOL TABLE:
-# EXE-NOT: 0400 {{.*}} _cold
-# EXE: {{.*}} g F __TEXT,__text _cold
+# EXE: (__TEXT,__text) external _cold
## ICF + N_COLD_FUNC: _cold, _normal, and _ordered have identical bodies.
## After folding, the non-cold copy should be the master so the folded body
>From ef811eebc79554a84ce839a62534cbc50c2ba95c Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Mon, 2 Mar 2026 15:52:28 +0800
Subject: [PATCH 3/7] test weakdef + cold interaction
---
lld/MachO/SymbolTable.cpp | 2 ++
lld/test/MachO/cold-func-weak-merge.s | 52 +++++++++++++++++++++++++++
2 files changed, 54 insertions(+)
create mode 100644 lld/test/MachO/cold-func-weak-merge.s
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 6371214fe9c21..f70bb9732081a 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -119,6 +119,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
defined->weakDefCanBeHidden &= isWeakDefCanBeHidden;
defined->referencedDynamically |= isReferencedDynamically;
defined->noDeadStrip |= noDeadStrip;
+ // If either weak definition is cold, the merged symbol is cold.
+ // This matches the behavior of both ld-prime and ld64.
defined->cold |= isCold;
}
if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) {
diff --git a/lld/test/MachO/cold-func-weak-merge.s b/lld/test/MachO/cold-func-weak-merge.s
new file mode 100644
index 0000000000000..3c41e9cd6abd7
--- /dev/null
+++ b/lld/test/MachO/cold-func-weak-merge.s
@@ -0,0 +1,52 @@
+# REQUIRES: aarch64
+
+## Test that when two weak definitions of the same symbol exist and either one
+## is marked N_COLD_FUNC, the merged symbol is treated as cold. This matches the
+## behavior of both ld-prime and ld64.
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/weak-cold.s -o %t/weak-cold.o
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/weak-noncold.s -o %t/weak-noncold.o
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/weak-main.s -o %t/weak-main.o
+
+## Link order: non-cold first, cold second. The non-cold def wins but cold |= true.
+# RUN: %lld -arch arm64 -lSystem %t/weak-noncold.o %t/weak-cold.o %t/weak-main.o -o %t/weak-nc-c.exe
+# RUN: llvm-objdump -d %t/weak-nc-c.exe | FileCheck %s
+
+## Link order: cold first, non-cold second. The cold def wins and cold |= false.
+# RUN: %lld -arch arm64 -lSystem %t/weak-cold.o %t/weak-noncold.o %t/weak-main.o -o %t/weak-c-nc.exe
+# RUN: llvm-objdump -d %t/weak-c-nc.exe | FileCheck %s
+
+## In both link orders, _weakfn ends up cold and is placed after _main.
+# CHECK: <_main>:
+# CHECK: <_weakfn>:
+
+#--- weak-cold.s
+.subsections_via_symbols
+.text
+.globl _weakfn
+.weak_definition _weakfn
+.p2align 2
+.desc _weakfn, 0x400
+_weakfn:
+ add x0, x1, x2
+ ret
+
+#--- weak-noncold.s
+.subsections_via_symbols
+.text
+.globl _weakfn
+.weak_definition _weakfn
+.p2align 2
+_weakfn:
+ add x0, x1, x2
+ ret
+
+#--- weak-main.s
+.subsections_via_symbols
+.text
+.globl _main
+.p2align 2
+_main:
+ bl _weakfn
+ ret
>From f7d6f714d476f0c4f37a3e177a617ec6875801a0 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Mon, 2 Mar 2026 20:50:36 +0800
Subject: [PATCH 4/7] add comment for Defined::cold
---
lld/MachO/Symbols.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index a51b02ca12cac..9fc7d6b079058 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -179,6 +179,8 @@ class Defined : public Symbol {
bool weakDefCanBeHidden : 1;
+ // Whether this symbol has the N_COLD_FUNC nlist flag set. Populated from the
+ // symbol table of input object files.
bool cold : 1;
private:
>From 51973a389c99c48ac835a1cb44139ed4c6f4cc4a Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Wed, 4 Mar 2026 20:50:08 +0800
Subject: [PATCH 5/7] cache coldness and simplify ICF interaction
---
lld/MachO/Driver.cpp | 14 +++++++++
lld/MachO/ICF.cpp | 17 +++++------
lld/MachO/InputSection.cpp | 14 ---------
lld/MachO/InputSection.h | 16 ++++++++---
lld/MachO/SectionPriorities.cpp | 5 +++-
lld/MachO/Writer.cpp | 25 ++++++-----------
lld/test/MachO/cold-func.s | 50 +++++++++++++++++++++++++--------
7 files changed, 84 insertions(+), 57 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 58fbe64c2d1f9..c318b3abe94f8 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -31,6 +31,7 @@
#include "lld/Common/Reproduce.h"
#include "lld/Common/Version.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -1706,6 +1707,17 @@ static SmallVector<StringRef, 0> getAllowableClients(opt::InputArgList &args) {
return vals;
}
+static void computeColdness() {
+ TimeTraceScope timeScope("Compute coldness");
+ for (InputSection *isec : inputSections) {
+ if (!isCodeSection(isec))
+ continue;
+ isec->isCold = llvm::any_of(isec->symbols, [](Defined *sym) {
+ return sym->isCold();
+ });
+ }
+}
+
namespace lld {
namespace macho {
bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
@@ -2417,6 +2429,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
false))
objc::mergeCategories();
+ computeColdness();
+
// ICF assumes that all literals have been folded already, so we must run
// foldIdenticalLiterals before foldIdenticalSections.
foldIdenticalLiterals();
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 913539e08469a..a02166d186b81 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -337,6 +337,8 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
ConcatInputSection *thunk =
makeSyntheticInputSection(isec->getSegName(), isec->getName());
+ // A thunk-folded cold function has a cold thunk.
+ thunk->isCold = isec->isCold;
addInputSection(thunk);
target->initICFSafeThunkBody(thunk, masterSym);
@@ -419,17 +421,9 @@ void ICF::run() {
// When using safe_thunks, ensure that we first sort by icfEqClass and
// then by keepUnique (descending). This guarantees that within an
// equivalence class, the keepUnique inputs are always first.
- if (a->icfEqClass[0] == b->icfEqClass[0]) {
- if (config->icfLevel == ICFLevel::safe_thunks &&
- a->keepUnique != b->keepUnique)
+ if (config->icfLevel == ICFLevel::safe_thunks)
+ if (a->icfEqClass[0] == b->icfEqClass[0])
return a->keepUnique > b->keepUnique;
- // Prefer non-cold sections as the master section to preserve locality
- // for the non-cold paths.
- bool aCold = a->isCold();
- bool bCold = b->isCold();
- if (aCold != bCold)
- return !aCold;
- }
return a->icfEqClass[0] < b->icfEqClass[0];
});
forEachClass([&](size_t begin, size_t end) {
@@ -479,6 +473,9 @@ void ICF::run() {
continue;
}
beginIsec->foldIdentical(icfInputs[i]);
+ // Make sure we don't fold hot code into cold regions.
+ if (!icfInputs[i]->isCold)
+ beginIsec->isCold = false;
}
});
}
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 50b0e512a64a3..34847adc85954 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -195,20 +195,6 @@ const Relocation *InputSection::getRelocAt(uint32_t off) const {
return &*it;
}
-bool InputSection::isCold() const {
- if (!isCodeSection(this))
- return false;
- for (const Defined *sym : symbols) {
- // Skip symbols absorbed from ICF-folded sections so that a non-cold master
- // is not misidentified as cold after folding a cold duplicate.
- if (sym->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
- continue;
- if (sym->isCold())
- return true;
- }
- return false;
-}
-
void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
Symbol::ICFFoldKind foldKind) {
align = std::max(align, copy->align);
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index c77ea4b543f82..494ddd40c34d3 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -62,17 +62,17 @@ class InputSection {
virtual void markLive(uint64_t off) = 0;
virtual InputSection *canonical() { return this; }
virtual const InputSection *canonical() const { return this; }
- bool isCold() const;
protected:
InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data,
uint32_t align)
- : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),
- data(data), section(section) {}
+ : sectionKind(kind), keepUnique(false), hasAltEntry(false), isCold(false),
+ align(align), data(data), section(section) {}
InputSection(const InputSection &rhs)
: sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
- align(rhs.align), data(rhs.data), section(rhs.section) {}
+ isCold(rhs.isCold), align(rhs.align), data(rhs.data),
+ section(rhs.section) {}
Kind sectionKind;
@@ -85,6 +85,14 @@ class InputSection {
// Does this section have symbols at offsets other than zero? (NOTE: only
// applies to ConcatInputSections.)
bool hasAltEntry : 1;
+ // Is this considered cold? Computed before ICF. Currently reflects whether
+ // any symbol in the section has the N_COLD_FUNC nlist flag set. Cold
+ // sections are placed at the end of their containing output section to
+ // improve locality of non-cold input sections. When a section is given an
+ // explicit priority (via order file, --bp-startup-sort, or
+ // --bp-compression-sort), this flag is unset so that the priority-based
+ // ordering takes precedence over cold partitioning.
+ bool isCold : 1;
uint32_t align = 1;
OutputSection *parent = nullptr;
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 818418e8aa29d..161724f097f59 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -399,8 +399,11 @@ macho::PriorityBuilder::buildInputSectionPriorities() {
std::optional<int> symbolPriority = getSymbolPriority(sym);
if (!symbolPriority)
return;
- int &priority = sectionPriorities[sym->isec()];
+ auto *isec = sym->isec();
+ int &priority = sectionPriorities[isec];
priority = std::min(priority, *symbolPriority);
+ // Order file takes precedence over cold partitioning.
+ isec->isCold = false;
};
// TODO: Make sure this handles weak symbols correctly.
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 7b43d6de9f119..08ed56d6342ec 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1008,22 +1008,15 @@ static void sortSegmentsAndSections() {
}
if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
- llvm::stable_sort(merged->inputs,
- [&](InputSection *a, InputSection *b) {
- auto aIt = isecPriorities.find(a);
- auto bIt = isecPriorities.find(b);
- bool aHasPriority = aIt != isecPriorities.end();
- bool bHasPriority = bIt != isecPriorities.end();
- if (aHasPriority != bHasPriority)
- return aHasPriority;
- if (aHasPriority)
- return aIt->second < bIt->second;
- bool aCold = a->isCold();
- bool bCold = b->isCold();
- if (aCold != bCold)
- return !aCold;
- return false;
- });
+ auto coldIt = std::stable_partition(merged->inputs.begin(), merged->inputs.end(), [](InputSection *isec) {
+ return !isec->isCold;
+ });
+ if (!isecPriorities.empty()) {
+ std::stable_sort(
+ merged->inputs.begin(), coldIt, [&](InputSection *a, InputSection *b) {
+ return isecPriorities.lookup(a) < isecPriorities.lookup(b);
+ });
+ }
}
}
}
diff --git a/lld/test/MachO/cold-func.s b/lld/test/MachO/cold-func.s
index ed8aacf6b3b02..09344d5579503 100644
--- a/lld/test/MachO/cold-func.s
+++ b/lld/test/MachO/cold-func.s
@@ -17,9 +17,9 @@
#--- test.s
.subsections_via_symbols
-## Mark _cold and _normal as address-significant for safe_thunks testing.
.addrsig
.addrsig_sym _cold
+.addrsig_sym _cold_addrsig
.addrsig_sym _normal
.text
@@ -46,12 +46,29 @@ _ordered:
add x3, x4, x5
ret
+.globl _cold_addrsig
+.p2align 2
+.desc _cold_addrsig, 0x400
+_cold_addrsig:
+ add x0, x1, x2
+ add x3, x4, x5
+ ret
+
+.globl _cold_unordered
+.p2align 2
+.desc _cold_unordered, 0x400
+_cold_unordered:
+ add x0, x1, x3
+ ret
+
.globl _main
.p2align 2
_main:
bl _normal
bl _cold
+ bl _cold_addrsig
bl _ordered
+ bl _cold_unordered
ret
## Basic N_COLD_FUNC support.
@@ -59,39 +76,48 @@ _main:
# NOORDER: <_ordered>:
# NOORDER: <_main>:
# NOORDER: <_cold>:
+# NOORDER: <_cold_unordered>:
## Ordered symbols should come before unordered cold symbols.
# ORDER-HOT: <_ordered>:
# ORDER-HOT: <_normal>:
# ORDER-HOT: <_main>:
# ORDER-HOT: <_cold>:
+# ORDER-HOT: <_cold_unordered>:
## Cold attribute should not change the ordering of order-file symbols.
# ORDER-COLD: <_cold>:
# ORDER-COLD: <_ordered>:
# ORDER-COLD: <_normal>:
# ORDER-COLD: <_main>:
+# ORDER-COLD: <_cold_unordered>:
## Check that N_COLD_FUNC is NOT preserved in the output executable.
# EXE: (__TEXT,__text) external _cold
## ICF + N_COLD_FUNC: _cold, _normal, and _ordered have identical bodies.
-## After folding, the non-cold copy should be the master so the folded body
-## is not the cold region (after _main).
+## _cold is the master; since _normal (non-cold) is folded into _cold,
+## _cold's isCold is unset and it stays in the hot region.
+## _cold_unordered has a different body and stays cold.
# ICF-LABEL: # Symbols:
-# ICF-DAG: _normal
-# ICF-DAG: _cold
-# ICF-DAG: _ordered
+# ICF: _cold
+# ICF: _normal
+# ICF: _ordered
# ICF: _main
+# ICF: _cold_unordered
-## With safe_thunks, _cold and _normal are keepUnique. The non-cold _normal
-## should be chosen as the master, appearing before _main. _cold gets a thunk
-## placed in the cold region.
+## With safe_thunks, _cold, _cold_addrsig, and _normal are keepUnique. _cold
+## appears first in input order so it becomes the master. Since _normal
+## (non-cold) is folded into _cold, _cold's isCold is unset. _normal gets a
+## non-cold thunk. _cold_addrsig gets a cold thunk. _cold_unordered stays cold
+## and anchors the cold region.
# SAFE-THUNKS-LABEL: # Symbols:
-# SAFE-THUNKS: 0x0000000C {{.*}} _normal
+# SAFE-THUNKS: 0x0000000C {{.*}} _cold
# SAFE-THUNKS: 0x00000000 {{.*}} _ordered
-# SAFE-THUNKS: 0x00000010 {{.*}} _main
-# SAFE-THUNKS: 0x00000004 {{.*}} _cold
+# SAFE-THUNKS: {{.*}} _main
+# SAFE-THUNKS: 0x00000004 {{.*}} _normal
+# SAFE-THUNKS: {{.*}} _cold_unordered
+# SAFE-THUNKS: 0x00000004 {{.*}} _cold_addrsig
#--- ord-hot
_ordered
>From 90a2a211e5ad5ad2e64806173cc7323d67a44658 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Wed, 4 Mar 2026 21:19:54 +0800
Subject: [PATCH 6/7] BP + N_COLD_FUNC interaction
---
lld/ELF/BPSectionOrderer.cpp | 3 +
lld/MachO/BPSectionOrderer.cpp | 18 ++-
.../lld/Common/BPSectionOrdererBase.inc | 57 +++++--
lld/test/MachO/bp-section-orderer-cold.s | 145 ++++++++++++++++++
4 files changed, 210 insertions(+), 13 deletions(-)
create mode 100644 lld/test/MachO/bp-section-orderer-cold.s
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 38f15b93050c9..ace6b42a7c47b 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -32,6 +32,9 @@ struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
static bool isCodeSection(const Section &sec) {
return sec.flags & ELF::SHF_EXECINSTR;
}
+ // ELF handles cold functions via separate output sections (.text.unlikely,
+ // .text.split), so no cold splitting is needed within BP.
+ static bool isColdSection(const Section &sec) { return false; }
ArrayRef<Defined *> getSymbols(const Section &sec) {
auto it = secToSym.find(&sec);
if (it == secToSym.end())
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index c9c6c1c62bdf9..9700b4fedcad0 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -34,6 +34,7 @@ struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
static bool isCodeSection(const Section &sec) {
return macho::isCodeSection(&sec);
}
+ static bool isColdSection(const Section &sec) { return sec.isCold; }
static ArrayRef<Defined *> getSymbols(const Section &sec) {
return sec.symbols;
}
@@ -140,8 +141,17 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
}
}
- return BPOrdererMachO().computeOrder(profilePath, forFunctionCompression,
- forDataCompression,
- compressionSortStartupFunctions, verbose,
- sections, rootSymbolToSectionIdxs);
+ auto result = BPOrdererMachO().computeOrder(
+ profilePath, forFunctionCompression, forDataCompression,
+ compressionSortStartupFunctions, verbose, sections,
+ rootSymbolToSectionIdxs);
+ // BP already orders cold sections after non-cold via separate buckets.
+ // Unset isCold on sections that received a BP priority so Writer.cpp's
+ // stable_partition doesn't re-partition them. Sections without a BP priority
+ // (e.g. non-startup cold sections when only --bp-startup-sort is used) keep
+ // their isCold flag for Writer.cpp to handle.
+ for (auto *isec : sections)
+ if (result.contains(isec))
+ isec->isCold = false;
+ return result;
}
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index 7d13cd25c0e76..f9a40f033216e 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -222,14 +222,18 @@ auto BPOrderer<D>::computeOrder(
}
SmallVector<unsigned> sectionIdxsForFunctionCompression,
- sectionIdxsForDataCompression;
+ sectionIdxsForColdFunctionCompression, sectionIdxsForDataCompression;
for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
if (startupSectionIdxUNs.contains(sectionIdx))
continue;
const auto *isec = sections[sectionIdx];
if (D::isCodeSection(*isec)) {
- if (forFunctionCompression)
- sectionIdxsForFunctionCompression.push_back(sectionIdx);
+ if (forFunctionCompression) {
+ if (D::isColdSection(*isec))
+ sectionIdxsForColdFunctionCompression.push_back(sectionIdx);
+ else
+ sectionIdxsForFunctionCompression.push_back(sectionIdx);
+ }
} else {
if (forDataCompression)
sectionIdxsForDataCompression.push_back(sectionIdx);
@@ -258,16 +262,21 @@ auto BPOrderer<D>::computeOrder(
auto unsForFunctionCompression = getUnsForCompression<D>(
sections, sectionToIdx, sectionIdxsForFunctionCompression,
&duplicateSectionIdxs, maxUN);
+ auto unsForColdFunctionCompression = getUnsForCompression<D>(
+ sections, sectionToIdx, sectionIdxsForColdFunctionCompression,
+ &duplicateSectionIdxs, maxUN);
auto unsForDataCompression = getUnsForCompression<D>(
sections, sectionToIdx, sectionIdxsForDataCompression,
&duplicateSectionIdxs, maxUN);
std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
- nodesForDataCompression;
+ nodesForColdFunctionCompression, nodesForDataCompression;
for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
nodesForStartup.emplace_back(sectionIdx, uns);
for (auto &[sectionIdx, uns] : unsForFunctionCompression)
nodesForFunctionCompression.emplace_back(sectionIdx, uns);
+ for (auto &[sectionIdx, uns] : unsForColdFunctionCompression)
+ nodesForColdFunctionCompression.emplace_back(sectionIdx, uns);
for (auto &[sectionIdx, uns] : unsForDataCompression)
nodesForDataCompression.emplace_back(sectionIdx, uns);
@@ -280,6 +289,8 @@ auto BPOrderer<D>::computeOrder(
// input linker order tends to be not bad.
llvm::sort(nodesForFunctionCompression,
[](auto &L, auto &R) { return L.Id < R.Id; });
+ llvm::sort(nodesForColdFunctionCompression,
+ [](auto &L, auto &R) { return L.Id < R.Id; });
llvm::sort(nodesForDataCompression,
[](auto &L, auto &R) { return L.Id < R.Id; });
@@ -289,12 +300,15 @@ auto BPOrderer<D>::computeOrder(
BalancedPartitioning bp(config);
bp.run(nodesForStartup);
bp.run(nodesForFunctionCompression);
+ bp.run(nodesForColdFunctionCompression);
bp.run(nodesForDataCompression);
}
unsigned numStartupSections = 0, startupSize = 0;
unsigned numCodeCompressionSections = 0, codeCompressionSize = 0;
unsigned numDuplicateCodeSections = 0, duplicateCodeSize = 0;
+ unsigned numColdCodeCompressionSections = 0, coldCodeCompressionSize = 0;
+ unsigned numDuplicateColdCodeSections = 0, duplicateColdCodeSize = 0;
unsigned numDataCompressionSections = 0, dataCompressionSize = 0;
unsigned numDuplicateDataSections = 0, duplicateDataSize = 0;
SetVector<const Section *> orderedSections;
@@ -306,7 +320,7 @@ auto BPOrderer<D>::computeOrder(
++numStartupSections;
}
}
- // then functions for compression,
+ // then non-cold functions for compression,
for (auto &node : nodesForFunctionCompression) {
const auto *isec = sections[node.Id];
if (orderedSections.insert(isec)) {
@@ -324,6 +338,24 @@ auto BPOrderer<D>::computeOrder(
}
}
}
+ // then cold functions for compression,
+ for (auto &node : nodesForColdFunctionCompression) {
+ const auto *isec = sections[node.Id];
+ if (orderedSections.insert(isec)) {
+ coldCodeCompressionSize += D::getSize(*isec);
+ ++numColdCodeCompressionSections;
+ }
+ auto It = duplicateSectionIdxs.find(node.Id);
+ if (It == duplicateSectionIdxs.end())
+ continue;
+ for (auto dupSecIdx : It->getSecond()) {
+ const auto *dupIsec = sections[dupSecIdx];
+ if (orderedSections.insert(dupIsec)) {
+ duplicateColdCodeSize += D::getSize(*dupIsec);
+ ++numDuplicateColdCodeSections;
+ }
+ }
+ }
// then data for compression.
for (auto &node : nodesForDataCompression) {
const auto *isec = sections[node.Id];
@@ -346,11 +378,13 @@ auto BPOrderer<D>::computeOrder(
if (verbose) {
unsigned numTotalOrderedSections =
numStartupSections + numCodeCompressionSections +
- numDuplicateCodeSections + numDataCompressionSections +
+ numDuplicateCodeSections + numColdCodeCompressionSections +
+ numDuplicateColdCodeSections + numDataCompressionSections +
numDuplicateDataSections;
- unsigned totalOrderedSize = startupSize + codeCompressionSize +
- duplicateCodeSize + dataCompressionSize +
- duplicateDataSize;
+ unsigned totalOrderedSize =
+ startupSize + codeCompressionSize + duplicateCodeSize +
+ coldCodeCompressionSize + duplicateColdCodeSize +
+ dataCompressionSize + duplicateDataSize;
dbgs() << "Ordered " << numTotalOrderedSections << " sections ("
<< totalOrderedSize << " bytes) using balanced partitioning:\n";
dbgs() << " Functions for startup: " << numStartupSections << " ("
@@ -359,6 +393,11 @@ auto BPOrderer<D>::computeOrder(
<< " (" << codeCompressionSize << " bytes)\n";
dbgs() << " Duplicate functions: " << numDuplicateCodeSections << " ("
<< duplicateCodeSize << " bytes)\n";
+ dbgs() << " Cold functions for compression: "
+ << numColdCodeCompressionSections << " ("
+ << coldCodeCompressionSize << " bytes)\n";
+ dbgs() << " Duplicate cold functions: " << numDuplicateColdCodeSections
+ << " (" << duplicateColdCodeSize << " bytes)\n";
dbgs() << " Data for compression: " << numDataCompressionSections << " ("
<< dataCompressionSize << " bytes)\n";
dbgs() << " Duplicate data: " << numDuplicateDataSections << " ("
diff --git a/lld/test/MachO/bp-section-orderer-cold.s b/lld/test/MachO/bp-section-orderer-cold.s
new file mode 100644
index 0000000000000..8fcc6625f1af2
--- /dev/null
+++ b/lld/test/MachO/bp-section-orderer-cold.s
@@ -0,0 +1,145 @@
+# REQUIRES: aarch64
+
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+
+## Compression sort only: all non-cold functions should appear before all cold
+## ones, despite input order interleaving them.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/compr.out %t/a.o --bp-compression-sort=function
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/compr.out | FileCheck %s --check-prefix=COMPRESSION
+
+# COMPRESSION: _main
+# COMPRESSION: _hot1
+# COMPRESSION: _hot2
+# COMPRESSION: _hot3
+# COMPRESSION: _cold1
+# COMPRESSION: _cold2
+# COMPRESSION: _cold3
+
+## Startup sort only: _hot1 and _cold1 are in the startup trace and get ordered
+## first. Non-startup non-cold sections keep input order, then non-startup cold
+## sections are pushed to the end.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/startup-only.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2> %t/startup-only-verbose.txt
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/startup-only.out | FileCheck %s --check-prefix=STARTUP-ONLY
+# RUN: FileCheck %s --input-file %t/startup-only-verbose.txt --check-prefix=STARTUP-ONLY-VERBOSE
+
+# STARTUP-ONLY: _hot1
+# STARTUP-ONLY: _cold1
+# STARTUP-ONLY: _main
+# STARTUP-ONLY: _hot2
+# STARTUP-ONLY: _hot3
+# STARTUP-ONLY: _cold2
+# STARTUP-ONLY: _cold3
+# STARTUP-ONLY-VERBOSE: Functions for startup: 2
+# STARTUP-ONLY-VERBOSE: Functions for compression: 0
+# STARTUP-ONLY-VERBOSE: Cold functions for compression: 0
+
+## Startup sort + compression sort: startup functions first, then non-cold
+## functions, then cold functions.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/startup-compr.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --bp-compression-sort=function --verbose-bp-section-orderer 2> %t/startup-compr-verbose.txt
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/startup-compr.out | FileCheck %s --check-prefix=STARTUP-COMPR
+# RUN: FileCheck %s --input-file %t/startup-compr-verbose.txt --check-prefix=STARTUP-COMPR-VERBOSE
+
+# STARTUP-COMPR: _hot1
+# STARTUP-COMPR: _cold1
+# STARTUP-COMPR: _main
+# STARTUP-COMPR: _hot2
+# STARTUP-COMPR: _hot3
+# STARTUP-COMPR: _cold2
+# STARTUP-COMPR: _cold3
+# STARTUP-COMPR-VERBOSE: Functions for startup: 2
+# STARTUP-COMPR-VERBOSE: Functions for compression: 3
+# STARTUP-COMPR-VERBOSE: Cold functions for compression: 2
+
+## Order file takes precedence over BP ordering. A cold function in the order
+## file appears at its ordered position, not in the cold region.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/order.out %t/a.o --bp-compression-sort=function -order_file %t/a.orderfile
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/order.out | FileCheck %s --check-prefix=ORDERFILE
+
+# ORDERFILE: _cold2
+# ORDERFILE: _hot1
+
+#--- a.s
+.subsections_via_symbols
+.text
+
+.globl _main
+_main:
+ ret
+
+.globl _cold1
+.desc _cold1, 0x400
+_cold1:
+ add w0, w0, #10
+ add w1, w1, #11
+ bl _main
+ ret
+
+.globl _hot1
+_hot1:
+ add w0, w0, #1
+ add w1, w1, #2
+ bl _main
+ ret
+
+.globl _cold2
+.desc _cold2, 0x400
+_cold2:
+ add w0, w0, #20
+ add w1, w1, #21
+ bl _hot1
+ ret
+
+.globl _hot2
+_hot2:
+ add w0, w0, #2
+ add w1, w1, #3
+ bl _hot1
+ ret
+
+.globl _cold3
+.desc _cold3, 0x400
+_cold3:
+ add w0, w0, #30
+ add w1, w1, #31
+ bl _cold1
+ ret
+
+.globl _hot3
+_hot3:
+ add w0, w0, #3
+ add w1, w1, #4
+ bl _cold1
+ ret
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+hot1, cold1
+
+hot1
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+cold1
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.orderfile
+_cold2
+_hot1
>From f3b646b8a73f38ede7b3b96068b9dbc85b3a7f0f Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Wed, 4 Mar 2026 21:41:29 +0800
Subject: [PATCH 7/7] format
---
lld/MachO/Driver.cpp | 5 ++---
lld/MachO/Writer.cpp | 15 ++++++++-------
lld/include/lld/Common/BPSectionOrdererBase.inc | 12 ++++++------
3 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index c318b3abe94f8..97819f77620e3 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1712,9 +1712,8 @@ static void computeColdness() {
for (InputSection *isec : inputSections) {
if (!isCodeSection(isec))
continue;
- isec->isCold = llvm::any_of(isec->symbols, [](Defined *sym) {
- return sym->isCold();
- });
+ isec->isCold =
+ llvm::any_of(isec->symbols, [](Defined *sym) { return sym->isCold(); });
}
}
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 08ed56d6342ec..89b6d467d0d44 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1008,14 +1008,15 @@ static void sortSegmentsAndSections() {
}
if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
- auto coldIt = std::stable_partition(merged->inputs.begin(), merged->inputs.end(), [](InputSection *isec) {
- return !isec->isCold;
- });
+ auto coldIt = std::stable_partition(
+ merged->inputs.begin(), merged->inputs.end(),
+ [](InputSection *isec) { return !isec->isCold; });
if (!isecPriorities.empty()) {
- std::stable_sort(
- merged->inputs.begin(), coldIt, [&](InputSection *a, InputSection *b) {
- return isecPriorities.lookup(a) < isecPriorities.lookup(b);
- });
+ std::stable_sort(merged->inputs.begin(), coldIt,
+ [&](InputSection *a, InputSection *b) {
+ return isecPriorities.lookup(a) <
+ isecPriorities.lookup(b);
+ });
}
}
}
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index f9a40f033216e..d2fd03af2b9c7 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -381,10 +381,10 @@ auto BPOrderer<D>::computeOrder(
numDuplicateCodeSections + numColdCodeCompressionSections +
numDuplicateColdCodeSections + numDataCompressionSections +
numDuplicateDataSections;
- unsigned totalOrderedSize =
- startupSize + codeCompressionSize + duplicateCodeSize +
- coldCodeCompressionSize + duplicateColdCodeSize +
- dataCompressionSize + duplicateDataSize;
+ unsigned totalOrderedSize = startupSize + codeCompressionSize +
+ duplicateCodeSize + coldCodeCompressionSize +
+ duplicateColdCodeSize + dataCompressionSize +
+ duplicateDataSize;
dbgs() << "Ordered " << numTotalOrderedSections << " sections ("
<< totalOrderedSize << " bytes) using balanced partitioning:\n";
dbgs() << " Functions for startup: " << numStartupSections << " ("
@@ -394,8 +394,8 @@ auto BPOrderer<D>::computeOrder(
dbgs() << " Duplicate functions: " << numDuplicateCodeSections << " ("
<< duplicateCodeSize << " bytes)\n";
dbgs() << " Cold functions for compression: "
- << numColdCodeCompressionSections << " ("
- << coldCodeCompressionSize << " bytes)\n";
+ << numColdCodeCompressionSections << " (" << coldCodeCompressionSize
+ << " bytes)\n";
dbgs() << " Duplicate cold functions: " << numDuplicateColdCodeSections
<< " (" << duplicateColdCodeSize << " bytes)\n";
dbgs() << " Data for compression: " << numDataCompressionSections << " ("
More information about the llvm-commits
mailing list