[lld] 2324c2e - [LLD] Two tweaks to symbol ordering scheme

Shoaib Meenai via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 12 11:36:34 PDT 2022


Author: YongKang Zhu
Date: 2022-07-12T11:34:17-07:00
New Revision: 2324c2e3c3110cd077bb537eaa5b686a6b97c8e7

URL: https://github.com/llvm/llvm-project/commit/2324c2e3c3110cd077bb537eaa5b686a6b97c8e7
DIFF: https://github.com/llvm/llvm-project/commit/2324c2e3c3110cd077bb537eaa5b686a6b97c8e7.diff

LOG: [LLD] Two tweaks to symbol ordering scheme

When `--symbol-ordering-file` is specified, the linker today will always put
hot contributions in the middle of cold ones when targeting RISC machine, so
to minimize the chances that branch thunks need be generated for hot code
calling into cold code. This is not necessary when user specifies an ordering
of read-only data (vs. function) symbols, or when output section is small such
that no branch thunk would ever be required. The latter is common for mobile
apps. For example, among all the native ARM64 libraries in Facebook Instagram
App for Android, 80% of them have text section smaller than 64KB and the
largest text section seen is less than 8MB, well below the distance that a
BRANCH26 can reach.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D128382

Added: 
    

Modified: 
    lld/ELF/Writer.cpp
    lld/test/ELF/arm-symbol-ordering-file.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index a398a7f952e91..705cc7bf97661 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1312,12 +1312,16 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder() {
 // Sorts the sections in ISD according to the provided section order.
 static void
 sortISDBySectionOrder(InputSectionDescription *isd,
-                      const DenseMap<const InputSectionBase *, int> &order) {
+                      const DenseMap<const InputSectionBase *, int> &order,
+                      bool executableOutputSection) {
   SmallVector<InputSection *, 0> unorderedSections;
   SmallVector<std::pair<InputSection *, int>, 0> orderedSections;
   uint64_t unorderedSize = 0;
+  uint64_t totalSize = 0;
 
   for (InputSection *isec : isd->sections) {
+    if (executableOutputSection)
+      totalSize += isec->getSize();
     auto i = order.find(isec);
     if (i == order.end()) {
       unorderedSections.push_back(isec);
@@ -1355,8 +1359,15 @@ sortISDBySectionOrder(InputSectionDescription *isd,
   // of the second block of cold code can call the hot code without a thunk. So
   // we effectively double the amount of code that could potentially call into
   // the hot code without a thunk.
+  //
+  // The above is not necessary if total size of input sections in this "isd"
+  // is small. Note that we assume all input sections are executable if the
+  // output section is executable (which is not always true but supposed to
+  // cover most cases).
   size_t insPt = 0;
-  if (target->getThunkSectionSpacing() && !orderedSections.empty()) {
+  if (executableOutputSection && !orderedSections.empty() &&
+      target->getThunkSectionSpacing() &&
+      totalSize >= target->getThunkSectionSpacing()) {
     uint64_t unorderedPos = 0;
     for (; insPt != unorderedSections.size(); ++insPt) {
       unorderedPos += unorderedSections[insPt]->getSize();
@@ -1397,7 +1408,7 @@ static void sortSection(OutputSection &osec,
   if (!order.empty())
     for (SectionCommand *b : osec.commands)
       if (auto *isd = dyn_cast<InputSectionDescription>(b))
-        sortISDBySectionOrder(isd, order);
+        sortISDBySectionOrder(isd, order, osec.flags & SHF_EXECINSTR);
 
   if (script->hasSectionsCommand)
     return;

diff  --git a/lld/test/ELF/arm-symbol-ordering-file.s b/lld/test/ELF/arm-symbol-ordering-file.s
index fe3de0d9d013a..c503bb2d856d0 100644
--- a/lld/test/ELF/arm-symbol-ordering-file.s
+++ b/lld/test/ELF/arm-symbol-ordering-file.s
@@ -1,16 +1,34 @@
 # REQUIRES: arm
-# RUN: llvm-mc -filetype=obj -triple=armv7-unknown-linux %s -o %t.o
+
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=armv7-unknown-linux %t/small.s -o %t.small.o
+# RUN: llvm-mc -filetype=obj -triple=armv7-unknown-linux %t/large.s -o %t.large.o
+# RUN: llvm-objcopy --set-section-flags .bar=alloc,readonly %t.large.o %t.large.RO.o
 
 # RUN: echo ordered > %t_order.txt
-# RUN: ld.lld --symbol-ordering-file %t_order.txt %t.o -o %t2.out
-# RUN: llvm-nm -n %t2.out | FileCheck %s
 
-# CHECK: unordered1
-# CHECK-NEXT: unordered2
-# CHECK-NEXT: unordered3
-# CHECK-NEXT: ordered
-# CHECK-NEXT: unordered4
+# RUN: ld.lld --symbol-ordering-file %t_order.txt %t.small.o -o %t2.small.out
+# RUN: ld.lld --symbol-ordering-file %t_order.txt %t.large.o -o %t2.large.out
+# RUN: ld.lld --symbol-ordering-file %t_order.txt %t.large.RO.o -o %t2.large.RO.out
+# RUN: llvm-nm -n %t2.small.out | FileCheck --check-prefix=SMALL %s
+# RUN: llvm-nm -n %t2.large.out | FileCheck --check-prefix=LARGE %s
+# RUN: llvm-nm -n %t2.large.RO.out | FileCheck --check-prefix=SMALL %s
+# RUN: rm -f %t.*.o %t2.*.out
+
+# SMALL: ordered
+# SMALL-NEXT: unordered1
+# SMALL-NEXT: unordered2
+# SMALL-NEXT: unordered3
+# SMALL-NEXT: unordered4
+
+# LARGE: unordered1
+# LARGE-NEXT: unordered2
+# LARGE-NEXT: ordered
+# LARGE-NEXT: unordered3
+# LARGE-NEXT: unordered4
 
+#--- small.s
 .section .foo,"ax",%progbits,unique,1
 unordered1:
 .zero 1
@@ -30,3 +48,24 @@ unordered4:
 .section .foo,"ax",%progbits,unique,5
 ordered:
 .zero 1
+
+#--- large.s
+.section .bar,"ax",%progbits,unique,1
+unordered1:
+.zero 0xC00000
+
+.section .bar,"ax",%progbits,unique,2
+unordered2:
+.zero 0xC00000
+
+.section .bar,"ax",%progbits,unique,3
+unordered3:
+.zero 0xC00000
+
+.section .bar,"ax",%progbits,unique,4
+unordered4:
+.zero 0xC00000
+
+.section .bar,"ax",%progbits,unique,5
+ordered:
+.zero 8


        


More information about the llvm-commits mailing list