[llvm] [BOLT] Use compact EH format for fixed-address executables (PR #117274)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 18:21:54 PST 2024


https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/117274

Use ULEB128 format for emitting LSDAs for fixed-address executables, similar to what we use for PIEs/DSOs. Main difference is that we don't use landing pad trampolines when landing pads are not contained in a single fragment. Instead, we fallback to emitting larger fixed-address LSDAs, which is still better than adding trampoline instructions.

>From 9c9e18bec60ccfc50c13154e46d043c9b49a70ed Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Wed, 20 Nov 2024 21:43:35 -0800
Subject: [PATCH] [BOLT] Use compact EH format for fixed-address executables

Use ULEB128 format for emitting LSDAs for fixed-address executables,
similar to what we use for PIEs/DSOs. Main difference is that we don't
use landing pad trampolines when landing pads are not contained in a
single fragment. Instead, we fallback to emitting larger fixed-address
LSDAs, which is still better than adding trampoline instructions.
---
 bolt/lib/Core/BinaryEmitter.cpp    | 23 ++++++---
 bolt/lib/Passes/SplitFunctions.cpp | 10 ++--
 bolt/test/X86/exceptions-compact.s | 75 ++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+), 10 deletions(-)
 create mode 100644 bolt/test/X86/exceptions-compact.s

diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index b11ae68b3fea19..f34a94c5779213 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -923,9 +923,21 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
   // As a solution, for fixed-address binaries we set LPStart to 0, and for
   // position-independent binaries we offset LP start by one byte.
   bool NeedsLPAdjustment = false;
-  const MCSymbol *LPStartSymbol = nullptr;
   std::function<void(const MCSymbol *)> emitLandingPad;
-  if (BC.HasFixedLoadAddress) {
+
+  // Check if there's a symbol associated with a landing pad fragment.
+  const MCSymbol *LPStartSymbol = BF.getLPStartSymbol(FF.getFragmentNum());
+  if (!LPStartSymbol) {
+    // Since landing pads are not in the same fragment, we fall back to emitting
+    // absolute addresses for this FDE.
+    if (opts::Verbosity >= 2) {
+      BC.outs() << "BOLT-INFO: falling back to generating absolute-address "
+                << "exception ranges for " << BF << '\n';
+    }
+
+    assert(BC.HasFixedLoadAddress &&
+           "Cannot emit absolute-address landing pads for PIE/DSO");
+
     Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
     Streamer.emitIntValue(0, 4);                      // LPStart
     emitLandingPad = [&](const MCSymbol *LPSymbol) {
@@ -936,9 +948,6 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
     };
   } else {
     std::optional<FragmentNum> LPFN = BF.getLPFragment(FF.getFragmentNum());
-    LPStartSymbol = BF.getLPStartSymbol(FF.getFragmentNum());
-    assert(LPFN && LPStartSymbol && "Expected LPStart symbol to be set");
-
     const FunctionFragment &LPFragment = BF.getLayout().getFragment(*LPFN);
     NeedsLPAdjustment =
         (!LPFragment.empty() && LPFragment.front()->isLandingPad());
@@ -987,7 +996,7 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
 
   // Emit encoding of entries in the call site table. The format is used for the
   // call site start, length, and corresponding landing pad.
-  if (BC.HasFixedLoadAddress)
+  if (!LPStartSymbol)
     Streamer.emitIntValue(dwarf::DW_EH_PE_sdata4, 1);
   else
     Streamer.emitIntValue(dwarf::DW_EH_PE_uleb128, 1);
@@ -1007,7 +1016,7 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
 
     // Start of the range is emitted relative to the start of current
     // function split part.
-    if (BC.HasFixedLoadAddress) {
+    if (!LPStartSymbol) {
       Streamer.emitAbsoluteSymbolDiff(BeginLabel, StartSymbol, 4);
       Streamer.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
     } else {
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index 3c527038b5e36c..b21401e069bfa6 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -901,7 +901,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
   // have to be placed in the same fragment. When we split them, create
   // trampoline landing pads that will redirect the execution to real LPs.
   TrampolineSetType Trampolines;
-  if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) {
+  if (BF.hasEHRanges() && BF.isSplit()) {
     // If all landing pads for this fragment are grouped in one (potentially
     // different) fragment, we can set LPStart to the start of that fragment
     // and avoid trampoline code.
@@ -925,8 +925,12 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
       } else if (LandingPadFragments.size() == 1) {
         BF.setLPFragment(FF.getFragmentNum(), LandingPadFragments.front());
       } else {
-        NeedsTrampolines = true;
-        break;
+        if (!BC.HasFixedLoadAddress) {
+          NeedsTrampolines = true;
+          break;
+        } else {
+          BF.setLPFragment(FF.getFragmentNum(), std::nullopt);
+        }
       }
     }
 
diff --git a/bolt/test/X86/exceptions-compact.s b/bolt/test/X86/exceptions-compact.s
new file mode 100644
index 00000000000000..2a9e2a21c3d121
--- /dev/null
+++ b/bolt/test/X86/exceptions-compact.s
@@ -0,0 +1,75 @@
+## Check that llvm-bolt is able to overwrite LSDA in ULEB128 format in-place for
+## all types of binaries.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
+# RUN: ld.lld --no-pie %t.o -o %t.exe -q
+# RUN: ld.lld --pie %t.o -o %t.pie -q
+# RUN: ld.lld --shared %t.o -o %t.so -q
+# RUN: llvm-bolt %t.exe -o %t.bolt --strict \
+# RUN:   | FileCheck --check-prefix=CHECK-BOLT %s
+# RUN: llvm-bolt %t.pie -o %t.pie.bolt --strict \
+# RUN:   | FileCheck --check-prefix=CHECK-BOLT %s
+# RUN: llvm-bolt %t.so -o %t.so.bolt --strict \
+# RUN:   | FileCheck --check-prefix=CHECK-BOLT %s
+
+# CHECK-BOLT: rewriting .gcc_except_table in-place
+
+# RUN: llvm-readelf -WS %t.bolt | FileCheck --check-prefix=CHECK-ELF %s
+# RUN: llvm-readelf -WS %t.pie.bolt | FileCheck --check-prefix=CHECK-ELF %s
+# RUN: llvm-readelf -WS %t.so.bolt | FileCheck --check-prefix=CHECK-ELF %s
+
+# CHECK-ELF-NOT: .bolt.org.gcc_except_table
+
+  .text
+  .global foo
+  .type foo, %function
+foo:
+  .cfi_startproc
+  ret
+  .cfi_endproc
+  .size foo, .-foo
+
+  .globl _start
+  .type _start, %function
+_start:
+.Lfunc_begin0:
+  .cfi_startproc
+  .cfi_lsda 27, .Lexception0
+  call foo
+.Ltmp0:
+  call foo
+.Ltmp1:
+  ret
+
+## Landing pads.
+.LLP1:
+  ret
+.LLP0:
+  ret
+
+  .cfi_endproc
+.Lfunc_end0:
+  .size _start, .-_start
+
+## EH table.
+  .section  .gcc_except_table,"a", at progbits
+  .p2align  2
+GCC_except_table0:
+.Lexception0:
+  .byte 255                             # @LPStart Encoding = omit
+  .byte 255                             # @TType Encoding = omit
+  .byte 1                               # Call site Encoding = uleb128
+  .uleb128 .Lcst_end0-.Lcst_begin0
+.Lcst_begin0:
+  .uleb128 .Lfunc_begin0-.Lfunc_begin0  # >> Call Site 1 <<
+  .uleb128 .Ltmp0-.Lfunc_begin0         #   Call between .Lfunc_begin0 and .Ltmp0
+  .uleb128 .LLP0-.Lfunc_begin0          #   jumps to .LLP0
+  .byte 0                               #   On action: cleanup
+  .uleb128 .Ltmp0-.Lfunc_begin0         # >> Call Site 2 <<
+  .uleb128 .Ltmp1-.Ltmp0                #   Call between .Ltmp0 and .Ltmp1
+  .uleb128 .LLP1-.Lfunc_begin0          #     jumps to .LLP1
+  .byte 0                               #   On action: cleanup
+.Lcst_end0:
+



More information about the llvm-commits mailing list