[lld] [lld][MachO] Add N_COLD_FUNC support (PR #183909)

Zhaoxuan Jiang via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 4 05:43:05 PST 2026


https://github.com/nocchijiang updated https://github.com/llvm/llvm-project/pull/183909

>From 5d3a9a812b6525e0869ca0a32c47a452f3506768 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Sat, 28 Feb 2026 16:47:07 +0800
Subject: [PATCH 1/7] [lld][MachO] Add N_COLD_FUNC support

Parse N_COLD_FUNC from input object files and use it to move cold
functions to the end of __text, after all non-cold and ordered symbols.

Key behaviors:
- Cold functions are placed after all non-cold unordered symbols.
- Order file entries take precedence over the cold attribute: a cold
  function listed in the order file retains its specified position.
- The N_COLD_FUNC flag is stripped from the output symbol table.
- ICF interaction: when folding identical cold and non-cold functions,
  the non-cold copy is kept as the master so the folded body remains
  in the hot region. InputSection::isCold() skips ICF-merged symbols
  to prevent a non-cold master from being misidentified as cold after
  absorbing cold duplicates via folding.
---
 lld/MachO/ICF.cpp               |  12 +++-
 lld/MachO/InputFiles.cpp        |  25 +++++---
 lld/MachO/InputSection.cpp      |  14 +++++
 lld/MachO/InputSection.h        |   1 +
 lld/MachO/SymbolTable.cpp       |   8 ++-
 lld/MachO/SymbolTable.h         |   3 +-
 lld/MachO/Symbols.cpp           |   4 +-
 lld/MachO/Symbols.h             |   5 +-
 lld/MachO/SyntheticSections.cpp |   2 +
 lld/MachO/Writer.cpp            |  24 +++++---
 lld/test/MachO/cold-func.s      | 103 ++++++++++++++++++++++++++++++++
 11 files changed, 176 insertions(+), 25 deletions(-)
 create mode 100644 lld/test/MachO/cold-func.s

diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index b03e2c5a42e00..913539e08469a 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -419,9 +419,17 @@ void ICF::run() {
         // When using safe_thunks, ensure that we first sort by icfEqClass and
         // then by keepUnique (descending). This guarantees that within an
         // equivalence class, the keepUnique inputs are always first.
-        if (config->icfLevel == ICFLevel::safe_thunks)
-          if (a->icfEqClass[0] == b->icfEqClass[0])
+        if (a->icfEqClass[0] == b->icfEqClass[0]) {
+          if (config->icfLevel == ICFLevel::safe_thunks &&
+              a->keepUnique != b->keepUnique)
             return a->keepUnique > b->keepUnique;
+          // Prefer non-cold sections as the master section to preserve locality
+          // for the non-cold paths.
+          bool aCold = a->isCold();
+          bool bCold = b->isCold();
+          if (aCold != bCold)
+            return !aCold;
+        }
         return a->icfEqClass[0] < b->icfEqClass[0];
       });
   forEachClass([&](size_t begin, size_t end) {
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index cc7eae51175bc..980ce6bbd4292 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -673,6 +673,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
 
   assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
 
+  bool isCold = sym.n_desc & N_COLD_FUNC;
+
   if (sym.n_type & N_EXT) {
     // -load_hidden makes us treat global symbols as linkage unit scoped.
     // Duplicates are reported but the symbol does not go in the export trie.
@@ -716,13 +718,15 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
     return symtab->addDefined(
         name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
         isPrivateExtern, sym.n_desc & REFERENCED_DYNAMICALLY,
-        sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden);
+        sym.n_desc & N_NO_DEAD_STRIP, isWeakDefCanBeHidden, isCold);
   }
   bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
-  return make<Defined>(
+  auto *defined = make<Defined>(
       name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
       /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
       sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
+  defined->cold = isCold;
+  return defined;
 }
 
 // Absolute symbols are defined symbols that do not have an associated
@@ -730,6 +734,7 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
 template <class NList>
 static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
                                      StringRef name, bool forceHidden) {
+  bool isCold = sym.n_desc & N_COLD_FUNC;
   assert(!(sym.n_desc & N_ARM_THUMB_DEF) && "ARM32 arch is not supported");
 
   if (sym.n_type & N_EXT) {
@@ -738,14 +743,16 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
                               /*isWeakDef=*/false, isPrivateExtern,
                               /*isReferencedDynamically=*/false,
                               sym.n_desc & N_NO_DEAD_STRIP,
-                              /*isWeakDefCanBeHidden=*/false);
+                              /*isWeakDefCanBeHidden=*/false, isCold);
   }
-  return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
-                       /*isWeakDef=*/false,
-                       /*isExternal=*/false, /*isPrivateExtern=*/false,
-                       /*includeInSymtab=*/true,
-                       /*isReferencedDynamically=*/false,
-                       sym.n_desc & N_NO_DEAD_STRIP);
+  auto *defined = make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
+                                /*isWeakDef=*/false,
+                                /*isExternal=*/false, /*isPrivateExtern=*/false,
+                                /*includeInSymtab=*/true,
+                                /*isReferencedDynamically=*/false,
+                                sym.n_desc & N_NO_DEAD_STRIP);
+  defined->cold = isCold;
+  return defined;
 }
 
 template <class NList>
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 34847adc85954..50b0e512a64a3 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -195,6 +195,20 @@ const Relocation *InputSection::getRelocAt(uint32_t off) const {
   return &*it;
 }
 
+bool InputSection::isCold() const {
+  if (!isCodeSection(this))
+    return false;
+  for (const Defined *sym : symbols) {
+    // Skip symbols absorbed from ICF-folded sections so that a non-cold master
+    // is not misidentified as cold after folding a cold duplicate.
+    if (sym->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
+      continue;
+    if (sym->isCold())
+      return true;
+  }
+  return false;
+}
+
 void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
                                        Symbol::ICFFoldKind foldKind) {
   align = std::max(align, copy->align);
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index e0a90a2edc0af..c77ea4b543f82 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -62,6 +62,7 @@ class InputSection {
   virtual void markLive(uint64_t off) = 0;
   virtual InputSection *canonical() { return this; }
   virtual const InputSection *canonical() const { return this; }
+  bool isCold() const;
 
 protected:
   InputSection(Kind kind, const Section &section, ArrayRef<uint8_t> data,
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 84d3e45d64396..6371214fe9c21 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -104,7 +104,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
                                  uint64_t size, bool isWeakDef,
                                  bool isPrivateExtern,
                                  bool isReferencedDynamically, bool noDeadStrip,
-                                 bool isWeakDefCanBeHidden) {
+                                 bool isWeakDefCanBeHidden, bool isCold) {
   bool overridesWeakDef = false;
   auto [s, wasInserted] = insert(name, file);
 
@@ -119,6 +119,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
           defined->weakDefCanBeHidden &= isWeakDefCanBeHidden;
           defined->referencedDynamically |= isReferencedDynamically;
           defined->noDeadStrip |= noDeadStrip;
+          defined->cold |= isCold;
         }
         if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) {
           concatIsec->wasCoalesced = true;
@@ -211,7 +212,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
   Defined *defined = replaceSymbol<Defined>(
       s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
       isPrivateExtern, /*includeInSymtab=*/true, isReferencedDynamically,
-      noDeadStrip, overridesWeakDef, isWeakDefCanBeHidden, interposable);
+      noDeadStrip, overridesWeakDef, isWeakDefCanBeHidden, interposable,
+      isCold);
   return defined;
 }
 
@@ -221,7 +223,7 @@ Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
   return addDefined(target, newFile, src->isec(), src->value, src->size,
                     src->isWeakDef(), isPrivateExtern,
                     src->referencedDynamically, src->noDeadStrip,
-                    src->weakDefCanBeHidden);
+                    src->weakDefCanBeHidden, src->cold);
 }
 
 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index e9b03da9d2549..fd87b7891a105 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -39,7 +39,8 @@ class SymbolTable {
   Defined *addDefined(StringRef name, InputFile *, InputSection *,
                       uint64_t value, uint64_t size, bool isWeakDef,
                       bool isPrivateExtern, bool isReferencedDynamically,
-                      bool noDeadStrip, bool isWeakDefCanBeHidden);
+                      bool noDeadStrip, bool isWeakDefCanBeHidden,
+                      bool isCold = false);
 
   Defined *aliasDefined(Defined *src, StringRef target, InputFile *newFile,
                         bool makePrivateExtern = false);
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 9faf01e09de05..27419caf9de1e 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -57,13 +57,13 @@ Defined::Defined(StringRef name, InputFile *file, InputSection *isec,
                  bool isPrivateExtern, bool includeInSymtab,
                  bool isReferencedDynamically, bool noDeadStrip,
                  bool canOverrideWeakDef, bool isWeakDefCanBeHidden,
-                 bool interposable)
+                 bool interposable, bool cold)
     : Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
       privateExtern(isPrivateExtern), includeInSymtab(includeInSymtab),
       identicalCodeFoldingKind(ICFFoldKind::None),
       referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
       interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
-      weakDef(isWeakDef), external(isExternal), originalIsec(isec),
+      cold(cold), weakDef(isWeakDef), external(isExternal), originalIsec(isec),
       value(value), size(size) {
   if (isec) {
     isec->symbols.push_back(this);
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index beb97b35bf881..a51b02ca12cac 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -118,7 +118,7 @@ class Defined : public Symbol {
           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
           bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
           bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
-          bool interposable = false);
+          bool interposable = false, bool cold = false);
 
   bool isWeakDef() const override { return weakDef; }
   bool isExternalWeakDef() const {
@@ -128,6 +128,7 @@ class Defined : public Symbol {
 
   bool isExternal() const { return external; }
   bool isAbsolute() const { return originalIsec == nullptr; }
+  bool isCold() const { return cold; }
 
   uint64_t getVA() const override;
 
@@ -178,6 +179,8 @@ class Defined : public Symbol {
 
   bool weakDefCanBeHidden : 1;
 
+  bool cold : 1;
+
 private:
   const bool weakDef : 1;
   const bool external : 1;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 36d15419a1091..5257433246d6b 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1445,6 +1445,8 @@ template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const {
       nList->n_desc |= defined->isExternalWeakDef() ? N_WEAK_DEF : 0;
       nList->n_desc |=
           defined->referencedDynamically ? REFERENCED_DYNAMICALLY : 0;
+      if (config->outputType == MH_OBJECT)
+        nList->n_desc |= defined->isCold() ? N_COLD_FUNC : 0;
     } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
       uint16_t n_desc = nList->n_desc;
       int16_t ordinal = ordinalForDylibSymbol(*dysym);
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index f9fd12a13dba3..7b43d6de9f119 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1007,13 +1007,23 @@ static void sortSegmentsAndSections() {
         osec->align = tlvAlign;
       }
 
-      if (!isecPriorities.empty()) {
-        if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
-          llvm::stable_sort(
-              merged->inputs, [&](InputSection *a, InputSection *b) {
-                return isecPriorities.lookup(a) < isecPriorities.lookup(b);
-              });
-        }
+      if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
+        llvm::stable_sort(merged->inputs,
+                          [&](InputSection *a, InputSection *b) {
+                            auto aIt = isecPriorities.find(a);
+                            auto bIt = isecPriorities.find(b);
+                            bool aHasPriority = aIt != isecPriorities.end();
+                            bool bHasPriority = bIt != isecPriorities.end();
+                            if (aHasPriority != bHasPriority)
+                              return aHasPriority;
+                            if (aHasPriority)
+                              return aIt->second < bIt->second;
+                            bool aCold = a->isCold();
+                            bool bCold = b->isCold();
+                            if (aCold != bCold)
+                              return !aCold;
+                            return false;
+                          });
       }
     }
   }
diff --git a/lld/test/MachO/cold-func.s b/lld/test/MachO/cold-func.s
new file mode 100644
index 0000000000000..27810bde33e2c
--- /dev/null
+++ b/lld/test/MachO/cold-func.s
@@ -0,0 +1,103 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/test.s -o %t.o
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.noorder.exe
+# RUN: llvm-objdump -d %t.noorder.exe | FileCheck %s --check-prefix=NOORDER
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.order-hot.exe -order_file %t/ord-hot
+# RUN: llvm-objdump -d %t.order-hot.exe | FileCheck %s --check-prefix=ORDER-HOT
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.order-cold.exe -order_file %t/ord-cold
+# RUN: llvm-objdump -d %t.order-cold.exe | FileCheck %s --check-prefix=ORDER-COLD
+# RUN: %lld -arch arm64 -lSystem %t.o -o %t.exe
+# RUN: llvm-objdump --syms %t.exe | FileCheck %s --check-prefix=EXE
+# RUN: %lld -arch arm64 -lSystem --icf=all %t.o -o %t.icf.exe -map %t/icf.map
+# RUN: FileCheck %s --input-file %t/icf.map --check-prefix=ICF
+# RUN: %lld -arch arm64 -lSystem --icf=safe_thunks %t.o -o %t.safe.exe -map %t/safe.map
+# RUN: FileCheck %s --input-file %t/safe.map --check-prefix=SAFE-THUNKS
+
+#--- test.s
+.subsections_via_symbols
+
+## Mark _cold and _normal as address-significant for safe_thunks testing.
+.addrsig
+.addrsig_sym _cold
+.addrsig_sym _normal
+
+.text
+
+.globl _cold
+.p2align 2
+.desc _cold, 0x400
+_cold:
+  add x0, x1, x2
+  add x3, x4, x5
+  ret
+
+.globl _normal
+.p2align 2
+_normal:
+  add x0, x1, x2
+  add x3, x4, x5
+  ret
+
+.globl _ordered
+.p2align 2
+_ordered:
+  add x0, x1, x2
+  add x3, x4, x5
+  ret
+
+.globl _main
+.p2align 2
+_main:
+  bl _normal
+  bl _cold
+  bl _ordered
+  ret
+
+## Basic N_COLD_FUNC support.
+# NOORDER: <_normal>:
+# NOORDER: <_ordered>:
+# NOORDER: <_main>:
+# NOORDER: <_cold>:
+
+## Ordered symbols should come before unordered cold symbols.
+# ORDER-HOT: <_ordered>:
+# ORDER-HOT: <_normal>:
+# ORDER-HOT: <_main>:
+# ORDER-HOT: <_cold>:
+
+## Cold attribute should not change the ordering of order-file symbols.
+# ORDER-COLD: <_cold>:
+# ORDER-COLD: <_ordered>:
+# ORDER-COLD: <_normal>:
+# ORDER-COLD: <_main>:
+
+## Check that N_COLD_FUNC is NOT preserved in the output executable.
+# EXE:      SYMBOL TABLE:
+# EXE-NOT:  0400 {{.*}} _cold
+# EXE:      {{.*}} g     F __TEXT,__text _cold
+
+## ICF + N_COLD_FUNC: _cold, _normal, and _ordered have identical bodies.
+## After folding, the non-cold copy should be the master so the folded body
+## is not the cold region (after _main).
+# ICF-LABEL: # Symbols:
+# ICF-DAG:   _normal
+# ICF-DAG:   _cold
+# ICF-DAG:   _ordered
+# ICF:       _main
+
+## With safe_thunks, _cold and _normal are keepUnique. The non-cold _normal
+## should be chosen as the master, appearing before _main. _cold gets a thunk
+## placed in the cold region.
+# SAFE-THUNKS-LABEL: # Symbols:
+# SAFE-THUNKS:       0x0000000C {{.*}} _normal
+# SAFE-THUNKS:       0x00000000 {{.*}} _ordered
+# SAFE-THUNKS:       0x00000010 {{.*}} _main
+# SAFE-THUNKS:       0x00000004 {{.*}} _cold
+
+#--- ord-hot
+_ordered
+
+#--- ord-cold
+_cold
+_ordered

>From 924b7bcb774f9c11fa78ece147edc95a4f03075d Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Mon, 2 Mar 2026 15:23:33 +0800
Subject: [PATCH 2/7] fix symtab check

---
 lld/test/MachO/cold-func.s | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lld/test/MachO/cold-func.s b/lld/test/MachO/cold-func.s
index 27810bde33e2c..ed8aacf6b3b02 100644
--- a/lld/test/MachO/cold-func.s
+++ b/lld/test/MachO/cold-func.s
@@ -8,7 +8,7 @@
 # RUN: %lld -arch arm64 -lSystem %t.o -o %t.order-cold.exe -order_file %t/ord-cold
 # RUN: llvm-objdump -d %t.order-cold.exe | FileCheck %s --check-prefix=ORDER-COLD
 # RUN: %lld -arch arm64 -lSystem %t.o -o %t.exe
-# RUN: llvm-objdump --syms %t.exe | FileCheck %s --check-prefix=EXE
+# RUN: llvm-nm -m %t.exe | FileCheck %s --check-prefix=EXE
 # RUN: %lld -arch arm64 -lSystem --icf=all %t.o -o %t.icf.exe -map %t/icf.map
 # RUN: FileCheck %s --input-file %t/icf.map --check-prefix=ICF
 # RUN: %lld -arch arm64 -lSystem --icf=safe_thunks %t.o -o %t.safe.exe -map %t/safe.map
@@ -73,9 +73,7 @@ _main:
 # ORDER-COLD: <_main>:
 
 ## Check that N_COLD_FUNC is NOT preserved in the output executable.
-# EXE:      SYMBOL TABLE:
-# EXE-NOT:  0400 {{.*}} _cold
-# EXE:      {{.*}} g     F __TEXT,__text _cold
+# EXE: (__TEXT,__text) external _cold
 
 ## ICF + N_COLD_FUNC: _cold, _normal, and _ordered have identical bodies.
 ## After folding, the non-cold copy should be the master so the folded body

>From ef811eebc79554a84ce839a62534cbc50c2ba95c Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Mon, 2 Mar 2026 15:52:28 +0800
Subject: [PATCH 3/7] test weakdef + cold interaction

---
 lld/MachO/SymbolTable.cpp             |  2 ++
 lld/test/MachO/cold-func-weak-merge.s | 52 +++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 lld/test/MachO/cold-func-weak-merge.s

diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 6371214fe9c21..f70bb9732081a 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -119,6 +119,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
           defined->weakDefCanBeHidden &= isWeakDefCanBeHidden;
           defined->referencedDynamically |= isReferencedDynamically;
           defined->noDeadStrip |= noDeadStrip;
+          // If either weak definition is cold, the merged symbol is cold.
+          // This matches the behavior of both ld-prime and ld64.
           defined->cold |= isCold;
         }
         if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) {
diff --git a/lld/test/MachO/cold-func-weak-merge.s b/lld/test/MachO/cold-func-weak-merge.s
new file mode 100644
index 0000000000000..3c41e9cd6abd7
--- /dev/null
+++ b/lld/test/MachO/cold-func-weak-merge.s
@@ -0,0 +1,52 @@
+# REQUIRES: aarch64
+
+## Test that when two weak definitions of the same symbol exist and either one
+## is marked N_COLD_FUNC, the merged symbol is treated as cold. This matches the
+## behavior of both ld-prime and ld64.
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/weak-cold.s -o %t/weak-cold.o
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/weak-noncold.s -o %t/weak-noncold.o
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/weak-main.s -o %t/weak-main.o
+
+## Link order: non-cold first, cold second. The non-cold def wins but cold |= true.
+# RUN: %lld -arch arm64 -lSystem %t/weak-noncold.o %t/weak-cold.o %t/weak-main.o -o %t/weak-nc-c.exe
+# RUN: llvm-objdump -d %t/weak-nc-c.exe | FileCheck %s
+
+## Link order: cold first, non-cold second. The cold def wins and cold |= false.
+# RUN: %lld -arch arm64 -lSystem %t/weak-cold.o %t/weak-noncold.o %t/weak-main.o -o %t/weak-c-nc.exe
+# RUN: llvm-objdump -d %t/weak-c-nc.exe | FileCheck %s
+
+## In both link orders, _weakfn ends up cold and is placed after _main.
+# CHECK: <_main>:
+# CHECK: <_weakfn>:
+
+#--- weak-cold.s
+.subsections_via_symbols
+.text
+.globl _weakfn
+.weak_definition _weakfn
+.p2align 2
+.desc _weakfn, 0x400
+_weakfn:
+  add x0, x1, x2
+  ret
+
+#--- weak-noncold.s
+.subsections_via_symbols
+.text
+.globl _weakfn
+.weak_definition _weakfn
+.p2align 2
+_weakfn:
+  add x0, x1, x2
+  ret
+
+#--- weak-main.s
+.subsections_via_symbols
+.text
+.globl _main
+.p2align 2
+_main:
+  bl _weakfn
+  ret

>From f7d6f714d476f0c4f37a3e177a617ec6875801a0 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Mon, 2 Mar 2026 20:50:36 +0800
Subject: [PATCH 4/7] add comment for Defined::cold

---
 lld/MachO/Symbols.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index a51b02ca12cac..9fc7d6b079058 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -179,6 +179,8 @@ class Defined : public Symbol {
 
   bool weakDefCanBeHidden : 1;
 
+  // Whether this symbol has the N_COLD_FUNC nlist flag set. Populated from the
+  // symbol table of input object files.
   bool cold : 1;
 
 private:

>From 51973a389c99c48ac835a1cb44139ed4c6f4cc4a Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Wed, 4 Mar 2026 20:50:08 +0800
Subject: [PATCH 5/7] cache coldness and simplify ICF interaction

---
 lld/MachO/Driver.cpp            | 14 +++++++++
 lld/MachO/ICF.cpp               | 17 +++++------
 lld/MachO/InputSection.cpp      | 14 ---------
 lld/MachO/InputSection.h        | 16 ++++++++---
 lld/MachO/SectionPriorities.cpp |  5 +++-
 lld/MachO/Writer.cpp            | 25 ++++++-----------
 lld/test/MachO/cold-func.s      | 50 +++++++++++++++++++++++++--------
 7 files changed, 84 insertions(+), 57 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 58fbe64c2d1f9..c318b3abe94f8 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -31,6 +31,7 @@
 #include "lld/Common/Reproduce.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -1706,6 +1707,17 @@ static SmallVector<StringRef, 0> getAllowableClients(opt::InputArgList &args) {
   return vals;
 }
 
+static void computeColdness() {
+  TimeTraceScope timeScope("Compute coldness");
+  for (InputSection *isec : inputSections) {
+    if (!isCodeSection(isec))
+      continue;
+    isec->isCold = llvm::any_of(isec->symbols, [](Defined *sym) {
+      return sym->isCold();
+    });
+  }
+}
+
 namespace lld {
 namespace macho {
 bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
@@ -2417,6 +2429,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
                      false))
       objc::mergeCategories();
 
+    computeColdness();
+
     // ICF assumes that all literals have been folded already, so we must run
     // foldIdenticalLiterals before foldIdenticalSections.
     foldIdenticalLiterals();
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 913539e08469a..a02166d186b81 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -337,6 +337,8 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
 
     ConcatInputSection *thunk =
         makeSyntheticInputSection(isec->getSegName(), isec->getName());
+    // A thunk-folded cold function has a cold thunk.
+    thunk->isCold = isec->isCold;
     addInputSection(thunk);
 
     target->initICFSafeThunkBody(thunk, masterSym);
@@ -419,17 +421,9 @@ void ICF::run() {
         // When using safe_thunks, ensure that we first sort by icfEqClass and
         // then by keepUnique (descending). This guarantees that within an
         // equivalence class, the keepUnique inputs are always first.
-        if (a->icfEqClass[0] == b->icfEqClass[0]) {
-          if (config->icfLevel == ICFLevel::safe_thunks &&
-              a->keepUnique != b->keepUnique)
+        if (config->icfLevel == ICFLevel::safe_thunks)
+          if (a->icfEqClass[0] == b->icfEqClass[0])
             return a->keepUnique > b->keepUnique;
-          // Prefer non-cold sections as the master section to preserve locality
-          // for the non-cold paths.
-          bool aCold = a->isCold();
-          bool bCold = b->isCold();
-          if (aCold != bCold)
-            return !aCold;
-        }
         return a->icfEqClass[0] < b->icfEqClass[0];
       });
   forEachClass([&](size_t begin, size_t end) {
@@ -479,6 +473,9 @@ void ICF::run() {
         continue;
       }
       beginIsec->foldIdentical(icfInputs[i]);
+      // Make sure we don't fold hot code into cold regions.
+      if (!icfInputs[i]->isCold)
+        beginIsec->isCold = false;
     }
   });
 }
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 50b0e512a64a3..34847adc85954 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -195,20 +195,6 @@ const Relocation *InputSection::getRelocAt(uint32_t off) const {
   return &*it;
 }
 
-bool InputSection::isCold() const {
-  if (!isCodeSection(this))
-    return false;
-  for (const Defined *sym : symbols) {
-    // Skip symbols absorbed from ICF-folded sections so that a non-cold master
-    // is not misidentified as cold after folding a cold duplicate.
-    if (sym->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
-      continue;
-    if (sym->isCold())
-      return true;
-  }
-  return false;
-}
-
 void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
                                        Symbol::ICFFoldKind foldKind) {
   align = std::max(align, copy->align);
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index c77ea4b543f82..494ddd40c34d3 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -62,17 +62,17 @@ class InputSection {
   virtual void markLive(uint64_t off) = 0;
   virtual InputSection *canonical() { return this; }
   virtual const InputSection *canonical() const { return this; }
-  bool isCold() const;
 
 protected:
   InputSection(Kind kind, const Section &section, ArrayRef<uint8_t> data,
                uint32_t align)
-      : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),
-        data(data), section(section) {}
+      : sectionKind(kind), keepUnique(false), hasAltEntry(false), isCold(false),
+        align(align), data(data), section(section) {}
 
   InputSection(const InputSection &rhs)
       : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
-        align(rhs.align), data(rhs.data), section(rhs.section) {}
+        isCold(rhs.isCold), align(rhs.align), data(rhs.data),
+        section(rhs.section) {}
 
   Kind sectionKind;
 
@@ -85,6 +85,14 @@ class InputSection {
   // Does this section have symbols at offsets other than zero? (NOTE: only
   // applies to ConcatInputSections.)
   bool hasAltEntry : 1;
+  // Is this considered cold? Computed before ICF. Currently reflects whether
+  // any symbol in the section has the N_COLD_FUNC nlist flag set. Cold
+  // sections are placed at the end of their containing output section to
+  // improve locality of non-cold input sections. When a section is given an
+  // explicit priority (via order file, --bp-startup-sort, or
+  // --bp-compression-sort), this flag is unset so that the priority-based
+  // ordering takes precedence over cold partitioning.
+  bool isCold : 1;
   uint32_t align = 1;
 
   OutputSection *parent = nullptr;
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 818418e8aa29d..161724f097f59 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -399,8 +399,11 @@ macho::PriorityBuilder::buildInputSectionPriorities() {
     std::optional<int> symbolPriority = getSymbolPriority(sym);
     if (!symbolPriority)
       return;
-    int &priority = sectionPriorities[sym->isec()];
+    auto *isec = sym->isec();
+    int &priority = sectionPriorities[isec];
     priority = std::min(priority, *symbolPriority);
+    // Order file takes precedence over cold partitioning.
+    isec->isCold = false;
   };
 
   // TODO: Make sure this handles weak symbols correctly.
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 7b43d6de9f119..08ed56d6342ec 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1008,22 +1008,15 @@ static void sortSegmentsAndSections() {
       }
 
       if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
-        llvm::stable_sort(merged->inputs,
-                          [&](InputSection *a, InputSection *b) {
-                            auto aIt = isecPriorities.find(a);
-                            auto bIt = isecPriorities.find(b);
-                            bool aHasPriority = aIt != isecPriorities.end();
-                            bool bHasPriority = bIt != isecPriorities.end();
-                            if (aHasPriority != bHasPriority)
-                              return aHasPriority;
-                            if (aHasPriority)
-                              return aIt->second < bIt->second;
-                            bool aCold = a->isCold();
-                            bool bCold = b->isCold();
-                            if (aCold != bCold)
-                              return !aCold;
-                            return false;
-                          });
+        auto coldIt = std::stable_partition(merged->inputs.begin(), merged->inputs.end(), [](InputSection *isec) {
+          return !isec->isCold;
+        });
+        if (!isecPriorities.empty()) {
+          std::stable_sort(
+              merged->inputs.begin(), coldIt, [&](InputSection *a, InputSection *b) {
+                return isecPriorities.lookup(a) < isecPriorities.lookup(b);
+              });
+        }
       }
     }
   }
diff --git a/lld/test/MachO/cold-func.s b/lld/test/MachO/cold-func.s
index ed8aacf6b3b02..09344d5579503 100644
--- a/lld/test/MachO/cold-func.s
+++ b/lld/test/MachO/cold-func.s
@@ -17,9 +17,9 @@
 #--- test.s
 .subsections_via_symbols
 
-## Mark _cold and _normal as address-significant for safe_thunks testing.
 .addrsig
 .addrsig_sym _cold
+.addrsig_sym _cold_addrsig
 .addrsig_sym _normal
 
 .text
@@ -46,12 +46,29 @@ _ordered:
   add x3, x4, x5
   ret
 
+.globl _cold_addrsig
+.p2align 2
+.desc _cold_addrsig, 0x400
+_cold_addrsig:
+  add x0, x1, x2
+  add x3, x4, x5
+  ret
+
+.globl _cold_unordered
+.p2align 2
+.desc _cold_unordered, 0x400
+_cold_unordered:
+  add x0, x1, x3
+  ret
+
 .globl _main
 .p2align 2
 _main:
   bl _normal
   bl _cold
+  bl _cold_addrsig
   bl _ordered
+  bl _cold_unordered
   ret
 
 ## Basic N_COLD_FUNC support.
@@ -59,39 +76,48 @@ _main:
 # NOORDER: <_ordered>:
 # NOORDER: <_main>:
 # NOORDER: <_cold>:
+# NOORDER: <_cold_unordered>:
 
 ## Ordered symbols should come before unordered cold symbols.
 # ORDER-HOT: <_ordered>:
 # ORDER-HOT: <_normal>:
 # ORDER-HOT: <_main>:
 # ORDER-HOT: <_cold>:
+# ORDER-HOT: <_cold_unordered>:
 
 ## Cold attribute should not change the ordering of order-file symbols.
 # ORDER-COLD: <_cold>:
 # ORDER-COLD: <_ordered>:
 # ORDER-COLD: <_normal>:
 # ORDER-COLD: <_main>:
+# ORDER-COLD: <_cold_unordered>:
 
 ## Check that N_COLD_FUNC is NOT preserved in the output executable.
 # EXE: (__TEXT,__text) external _cold
 
 ## ICF + N_COLD_FUNC: _cold, _normal, and _ordered have identical bodies.
-## After folding, the non-cold copy should be the master so the folded body
-## is not the cold region (after _main).
+## _cold is the master; since _normal (non-cold) is folded into _cold,
+## _cold's isCold is unset and it stays in the hot region.
+## _cold_unordered has a different body and stays cold.
 # ICF-LABEL: # Symbols:
-# ICF-DAG:   _normal
-# ICF-DAG:   _cold
-# ICF-DAG:   _ordered
+# ICF:       _cold
+# ICF:       _normal
+# ICF:       _ordered
 # ICF:       _main
+# ICF:       _cold_unordered
 
-## With safe_thunks, _cold and _normal are keepUnique. The non-cold _normal
-## should be chosen as the master, appearing before _main. _cold gets a thunk
-## placed in the cold region.
+## With safe_thunks, _cold, _cold_addrsig, and _normal are keepUnique. _cold
+## appears first in input order so it becomes the master. Since _normal
+## (non-cold) is folded into _cold, _cold's isCold is unset. _normal gets a
+## non-cold thunk. _cold_addrsig gets a cold thunk. _cold_unordered stays cold
+## and anchors the cold region.
 # SAFE-THUNKS-LABEL: # Symbols:
-# SAFE-THUNKS:       0x0000000C {{.*}} _normal
+# SAFE-THUNKS:       0x0000000C {{.*}} _cold
 # SAFE-THUNKS:       0x00000000 {{.*}} _ordered
-# SAFE-THUNKS:       0x00000010 {{.*}} _main
-# SAFE-THUNKS:       0x00000004 {{.*}} _cold
+# SAFE-THUNKS:       {{.*}} _main
+# SAFE-THUNKS:       0x00000004 {{.*}} _normal
+# SAFE-THUNKS:       {{.*}} _cold_unordered
+# SAFE-THUNKS:       0x00000004 {{.*}} _cold_addrsig
 
 #--- ord-hot
 _ordered

>From 90a2a211e5ad5ad2e64806173cc7323d67a44658 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan at bytedance.com>
Date: Wed, 4 Mar 2026 21:19:54 +0800
Subject: [PATCH 6/7] BP + N_COLD_FUNC interaction

---
 lld/ELF/BPSectionOrderer.cpp                  |   3 +
 lld/MachO/BPSectionOrderer.cpp                |  18 ++-
 .../lld/Common/BPSectionOrdererBase.inc       |  57 +++++--
 lld/test/MachO/bp-section-orderer-cold.s      | 145 ++++++++++++++++++
 4 files changed, 210 insertions(+), 13 deletions(-)
 create mode 100644 lld/test/MachO/bp-section-orderer-cold.s

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 38f15b93050c9..ace6b42a7c47b 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -32,6 +32,9 @@ struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
   static bool isCodeSection(const Section &sec) {
     return sec.flags & ELF::SHF_EXECINSTR;
   }
+  // ELF handles cold functions via separate output sections (.text.unlikely,
+  // .text.split), so no cold splitting is needed within BP.
+  static bool isColdSection(const Section &sec) { return false; }
   ArrayRef<Defined *> getSymbols(const Section &sec) {
     auto it = secToSym.find(&sec);
     if (it == secToSym.end())
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index c9c6c1c62bdf9..9700b4fedcad0 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -34,6 +34,7 @@ struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
   static bool isCodeSection(const Section &sec) {
     return macho::isCodeSection(&sec);
   }
+  static bool isColdSection(const Section &sec) { return sec.isCold; }
   static ArrayRef<Defined *> getSymbols(const Section &sec) {
     return sec.symbols;
   }
@@ -140,8 +141,17 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
     }
   }
 
-  return BPOrdererMachO().computeOrder(profilePath, forFunctionCompression,
-                                       forDataCompression,
-                                       compressionSortStartupFunctions, verbose,
-                                       sections, rootSymbolToSectionIdxs);
+  auto result = BPOrdererMachO().computeOrder(
+      profilePath, forFunctionCompression, forDataCompression,
+      compressionSortStartupFunctions, verbose, sections,
+      rootSymbolToSectionIdxs);
+  // BP already orders cold sections after non-cold via separate buckets.
+  // Unset isCold on sections that received a BP priority so Writer.cpp's
+  // stable_partition doesn't re-partition them. Sections without a BP priority
+  // (e.g. non-startup cold sections when only --bp-startup-sort is used) keep
+  // their isCold flag for Writer.cpp to handle.
+  for (auto *isec : sections)
+    if (result.contains(isec))
+      isec->isCold = false;
+  return result;
 }
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index 7d13cd25c0e76..f9a40f033216e 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -222,14 +222,18 @@ auto BPOrderer<D>::computeOrder(
   }
 
   SmallVector<unsigned> sectionIdxsForFunctionCompression,
-      sectionIdxsForDataCompression;
+      sectionIdxsForColdFunctionCompression, sectionIdxsForDataCompression;
   for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
     if (startupSectionIdxUNs.contains(sectionIdx))
       continue;
     const auto *isec = sections[sectionIdx];
     if (D::isCodeSection(*isec)) {
-      if (forFunctionCompression)
-        sectionIdxsForFunctionCompression.push_back(sectionIdx);
+      if (forFunctionCompression) {
+        if (D::isColdSection(*isec))
+          sectionIdxsForColdFunctionCompression.push_back(sectionIdx);
+        else
+          sectionIdxsForFunctionCompression.push_back(sectionIdx);
+      }
     } else {
       if (forDataCompression)
         sectionIdxsForDataCompression.push_back(sectionIdx);
@@ -258,16 +262,21 @@ auto BPOrderer<D>::computeOrder(
   auto unsForFunctionCompression = getUnsForCompression<D>(
       sections, sectionToIdx, sectionIdxsForFunctionCompression,
       &duplicateSectionIdxs, maxUN);
+  auto unsForColdFunctionCompression = getUnsForCompression<D>(
+      sections, sectionToIdx, sectionIdxsForColdFunctionCompression,
+      &duplicateSectionIdxs, maxUN);
   auto unsForDataCompression = getUnsForCompression<D>(
       sections, sectionToIdx, sectionIdxsForDataCompression,
       &duplicateSectionIdxs, maxUN);
 
   std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
-      nodesForDataCompression;
+      nodesForColdFunctionCompression, nodesForDataCompression;
   for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
     nodesForStartup.emplace_back(sectionIdx, uns);
   for (auto &[sectionIdx, uns] : unsForFunctionCompression)
     nodesForFunctionCompression.emplace_back(sectionIdx, uns);
+  for (auto &[sectionIdx, uns] : unsForColdFunctionCompression)
+    nodesForColdFunctionCompression.emplace_back(sectionIdx, uns);
   for (auto &[sectionIdx, uns] : unsForDataCompression)
     nodesForDataCompression.emplace_back(sectionIdx, uns);
 
@@ -280,6 +289,8 @@ auto BPOrderer<D>::computeOrder(
   // input linker order tends to be not bad.
   llvm::sort(nodesForFunctionCompression,
              [](auto &L, auto &R) { return L.Id < R.Id; });
+  llvm::sort(nodesForColdFunctionCompression,
+             [](auto &L, auto &R) { return L.Id < R.Id; });
   llvm::sort(nodesForDataCompression,
              [](auto &L, auto &R) { return L.Id < R.Id; });
 
@@ -289,12 +300,15 @@ auto BPOrderer<D>::computeOrder(
     BalancedPartitioning bp(config);
     bp.run(nodesForStartup);
     bp.run(nodesForFunctionCompression);
+    bp.run(nodesForColdFunctionCompression);
     bp.run(nodesForDataCompression);
   }
 
   unsigned numStartupSections = 0, startupSize = 0;
   unsigned numCodeCompressionSections = 0, codeCompressionSize = 0;
   unsigned numDuplicateCodeSections = 0, duplicateCodeSize = 0;
+  unsigned numColdCodeCompressionSections = 0, coldCodeCompressionSize = 0;
+  unsigned numDuplicateColdCodeSections = 0, duplicateColdCodeSize = 0;
   unsigned numDataCompressionSections = 0, dataCompressionSize = 0;
   unsigned numDuplicateDataSections = 0, duplicateDataSize = 0;
   SetVector<const Section *> orderedSections;
@@ -306,7 +320,7 @@ auto BPOrderer<D>::computeOrder(
       ++numStartupSections;
     }
   }
-  // then functions for compression,
+  // then non-cold functions for compression,
   for (auto &node : nodesForFunctionCompression) {
     const auto *isec = sections[node.Id];
     if (orderedSections.insert(isec)) {
@@ -324,6 +338,24 @@ auto BPOrderer<D>::computeOrder(
       }
     }
   }
+  // then cold functions for compression,
+  for (auto &node : nodesForColdFunctionCompression) {
+    const auto *isec = sections[node.Id];
+    if (orderedSections.insert(isec)) {
+      coldCodeCompressionSize += D::getSize(*isec);
+      ++numColdCodeCompressionSections;
+    }
+    auto It = duplicateSectionIdxs.find(node.Id);
+    if (It == duplicateSectionIdxs.end())
+      continue;
+    for (auto dupSecIdx : It->getSecond()) {
+      const auto *dupIsec = sections[dupSecIdx];
+      if (orderedSections.insert(dupIsec)) {
+        duplicateColdCodeSize += D::getSize(*dupIsec);
+        ++numDuplicateColdCodeSections;
+      }
+    }
+  }
   // then data for compression.
   for (auto &node : nodesForDataCompression) {
     const auto *isec = sections[node.Id];
@@ -346,11 +378,13 @@ auto BPOrderer<D>::computeOrder(
   if (verbose) {
     unsigned numTotalOrderedSections =
         numStartupSections + numCodeCompressionSections +
-        numDuplicateCodeSections + numDataCompressionSections +
+        numDuplicateCodeSections + numColdCodeCompressionSections +
+        numDuplicateColdCodeSections + numDataCompressionSections +
         numDuplicateDataSections;
-    unsigned totalOrderedSize = startupSize + codeCompressionSize +
-                                duplicateCodeSize + dataCompressionSize +
-                                duplicateDataSize;
+    unsigned totalOrderedSize =
+        startupSize + codeCompressionSize + duplicateCodeSize +
+        coldCodeCompressionSize + duplicateColdCodeSize +
+        dataCompressionSize + duplicateDataSize;
     dbgs() << "Ordered " << numTotalOrderedSections << " sections ("
            << totalOrderedSize << " bytes) using balanced partitioning:\n";
     dbgs() << "  Functions for startup: " << numStartupSections << " ("
@@ -359,6 +393,11 @@ auto BPOrderer<D>::computeOrder(
            << " (" << codeCompressionSize << " bytes)\n";
     dbgs() << "  Duplicate functions: " << numDuplicateCodeSections << " ("
            << duplicateCodeSize << " bytes)\n";
+    dbgs() << "  Cold functions for compression: "
+           << numColdCodeCompressionSections << " ("
+           << coldCodeCompressionSize << " bytes)\n";
+    dbgs() << "  Duplicate cold functions: " << numDuplicateColdCodeSections
+           << " (" << duplicateColdCodeSize << " bytes)\n";
     dbgs() << "  Data for compression: " << numDataCompressionSections << " ("
            << dataCompressionSize << " bytes)\n";
     dbgs() << "  Duplicate data: " << numDuplicateDataSections << " ("
diff --git a/lld/test/MachO/bp-section-orderer-cold.s b/lld/test/MachO/bp-section-orderer-cold.s
new file mode 100644
index 0000000000000..8fcc6625f1af2
--- /dev/null
+++ b/lld/test/MachO/bp-section-orderer-cold.s
@@ -0,0 +1,145 @@
+# REQUIRES: aarch64
+
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+
+## Compression sort only: all non-cold functions should appear before all cold
+## ones, despite input order interleaving them.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/compr.out %t/a.o --bp-compression-sort=function
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/compr.out | FileCheck %s --check-prefix=COMPRESSION
+
+# COMPRESSION:         _main
+# COMPRESSION:         _hot1
+# COMPRESSION:         _hot2
+# COMPRESSION:         _hot3
+# COMPRESSION:         _cold1
+# COMPRESSION:         _cold2
+# COMPRESSION:         _cold3
+
+## Startup sort only: _hot1 and _cold1 are in the startup trace and get ordered
+## first. Non-startup non-cold sections keep input order, then non-startup cold
+## sections are pushed to the end.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/startup-only.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2> %t/startup-only-verbose.txt
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/startup-only.out | FileCheck %s --check-prefix=STARTUP-ONLY
+# RUN: FileCheck %s --input-file %t/startup-only-verbose.txt --check-prefix=STARTUP-ONLY-VERBOSE
+
+# STARTUP-ONLY:         _hot1
+# STARTUP-ONLY:         _cold1
+# STARTUP-ONLY:         _main
+# STARTUP-ONLY:         _hot2
+# STARTUP-ONLY:         _hot3
+# STARTUP-ONLY:         _cold2
+# STARTUP-ONLY:         _cold3
+# STARTUP-ONLY-VERBOSE: Functions for startup: 2
+# STARTUP-ONLY-VERBOSE: Functions for compression: 0
+# STARTUP-ONLY-VERBOSE: Cold functions for compression: 0
+
+## Startup sort + compression sort: startup functions first, then non-cold
+## functions, then cold functions.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/startup-compr.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --bp-compression-sort=function --verbose-bp-section-orderer 2> %t/startup-compr-verbose.txt
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/startup-compr.out | FileCheck %s --check-prefix=STARTUP-COMPR
+# RUN: FileCheck %s --input-file %t/startup-compr-verbose.txt --check-prefix=STARTUP-COMPR-VERBOSE
+
+# STARTUP-COMPR:         _hot1
+# STARTUP-COMPR:         _cold1
+# STARTUP-COMPR:         _main
+# STARTUP-COMPR:         _hot2
+# STARTUP-COMPR:         _hot3
+# STARTUP-COMPR:         _cold2
+# STARTUP-COMPR:         _cold3
+# STARTUP-COMPR-VERBOSE: Functions for startup: 2
+# STARTUP-COMPR-VERBOSE: Functions for compression: 3
+# STARTUP-COMPR-VERBOSE: Cold functions for compression: 2
+
+## Order file takes precedence over BP ordering. A cold function in the order
+## file appears at its ordered position, not in the cold region.
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/order.out %t/a.o --bp-compression-sort=function -order_file %t/a.orderfile
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/order.out | FileCheck %s --check-prefix=ORDERFILE
+
+# ORDERFILE:     _cold2
+# ORDERFILE:     _hot1
+
+#--- a.s
+.subsections_via_symbols
+.text
+
+.globl _main
+_main:
+  ret
+
+.globl _cold1
+.desc _cold1, 0x400
+_cold1:
+  add w0, w0, #10
+  add w1, w1, #11
+  bl _main
+  ret
+
+.globl _hot1
+_hot1:
+  add w0, w0, #1
+  add w1, w1, #2
+  bl _main
+  ret
+
+.globl _cold2
+.desc _cold2, 0x400
+_cold2:
+  add w0, w0, #20
+  add w1, w1, #21
+  bl _hot1
+  ret
+
+.globl _hot2
+_hot2:
+  add w0, w0, #2
+  add w1, w1, #3
+  bl _hot1
+  ret
+
+.globl _cold3
+.desc _cold3, 0x400
+_cold3:
+  add w0, w0, #30
+  add w1, w1, #31
+  bl _cold1
+  ret
+
+.globl _hot3
+_hot3:
+  add w0, w0, #3
+  add w1, w1, #4
+  bl _cold1
+  ret
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+hot1, cold1
+
+hot1
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+cold1
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.orderfile
+_cold2
+_hot1

>From f3b646b8a73f38ede7b3b96068b9dbc85b3a7f0f Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Wed, 4 Mar 2026 21:41:29 +0800
Subject: [PATCH 7/7] format

---
 lld/MachO/Driver.cpp                            |  5 ++---
 lld/MachO/Writer.cpp                            | 15 ++++++++-------
 lld/include/lld/Common/BPSectionOrdererBase.inc | 12 ++++++------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index c318b3abe94f8..97819f77620e3 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1712,9 +1712,8 @@ static void computeColdness() {
   for (InputSection *isec : inputSections) {
     if (!isCodeSection(isec))
       continue;
-    isec->isCold = llvm::any_of(isec->symbols, [](Defined *sym) {
-      return sym->isCold();
-    });
+    isec->isCold =
+        llvm::any_of(isec->symbols, [](Defined *sym) { return sym->isCold(); });
   }
 }
 
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 08ed56d6342ec..89b6d467d0d44 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1008,14 +1008,15 @@ static void sortSegmentsAndSections() {
       }
 
       if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
-        auto coldIt = std::stable_partition(merged->inputs.begin(), merged->inputs.end(), [](InputSection *isec) {
-          return !isec->isCold;
-        });
+        auto coldIt = std::stable_partition(
+            merged->inputs.begin(), merged->inputs.end(),
+            [](InputSection *isec) { return !isec->isCold; });
         if (!isecPriorities.empty()) {
-          std::stable_sort(
-              merged->inputs.begin(), coldIt, [&](InputSection *a, InputSection *b) {
-                return isecPriorities.lookup(a) < isecPriorities.lookup(b);
-              });
+          std::stable_sort(merged->inputs.begin(), coldIt,
+                           [&](InputSection *a, InputSection *b) {
+                             return isecPriorities.lookup(a) <
+                                    isecPriorities.lookup(b);
+                           });
         }
       }
     }
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index f9a40f033216e..d2fd03af2b9c7 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -381,10 +381,10 @@ auto BPOrderer<D>::computeOrder(
         numDuplicateCodeSections + numColdCodeCompressionSections +
         numDuplicateColdCodeSections + numDataCompressionSections +
         numDuplicateDataSections;
-    unsigned totalOrderedSize =
-        startupSize + codeCompressionSize + duplicateCodeSize +
-        coldCodeCompressionSize + duplicateColdCodeSize +
-        dataCompressionSize + duplicateDataSize;
+    unsigned totalOrderedSize = startupSize + codeCompressionSize +
+                                duplicateCodeSize + coldCodeCompressionSize +
+                                duplicateColdCodeSize + dataCompressionSize +
+                                duplicateDataSize;
     dbgs() << "Ordered " << numTotalOrderedSections << " sections ("
            << totalOrderedSize << " bytes) using balanced partitioning:\n";
     dbgs() << "  Functions for startup: " << numStartupSections << " ("
@@ -394,8 +394,8 @@ auto BPOrderer<D>::computeOrder(
     dbgs() << "  Duplicate functions: " << numDuplicateCodeSections << " ("
            << duplicateCodeSize << " bytes)\n";
     dbgs() << "  Cold functions for compression: "
-           << numColdCodeCompressionSections << " ("
-           << coldCodeCompressionSize << " bytes)\n";
+           << numColdCodeCompressionSections << " (" << coldCodeCompressionSize
+           << " bytes)\n";
     dbgs() << "  Duplicate cold functions: " << numDuplicateColdCodeSections
            << " (" << duplicateColdCodeSize << " bytes)\n";
     dbgs() << "  Data for compression: " << numDataCompressionSections << " ("



More information about the llvm-commits mailing list