[llvm] [Bolt] do not search for PLT entries if the relocation is against (PR #69136)

via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 15 19:30:19 PDT 2023


https://github.com/linsinan1995 created https://github.com/llvm/llvm-project/pull/69136

It is legal to have an address of zero with weak references, but Bolt will update the relocation against this symbol to its PLT entry(a bolt-synthetized symbol and has a non-zero address), which leads to wrong runtime behaviour.  

I recently encountered a problem where a segv occurs after using Bolt(crash even just `llvm-bolt app -o app.opt`). It turns out to be related to weak references. e.g.
```
__attribute__((weak)) void undef_weak_fun();

  if (&undef_weak_fun)
    undef_weak_fun();
```
(ref: https://maskray.me/blog/2021-04-25-weak-symbol)

In this case, an undefined weak symbol `undef_weak_fun` has an address of zero, and Bolt incorrectly changes the relocation for the corresponding symbol to symbol at PLT, leading to incorrect runtime behaviour. 

A real-world use case of weak reference: https://github.com/facebook/zstd/commit/6cee3c2c4f031125f487d2aa09c878e52a18fd4e

>From 02d29da797123b4e3d9ce2793b7280e2f44dab08 Mon Sep 17 00:00:00 2001
From: Sinan Lin <sinan.lin at linux.alibaba.com>
Date: Mon, 16 Oct 2023 09:51:48 +0800
Subject: [PATCH] [Bolt] do not search for PLT entries if the relocation is
 against a weak reference symbol.

Take a common weak reference pattern for example
```
__attribute__((weak)) void undef_weak_fun();

  if (&undef_weak_fun)
    undef_weak_fun();
```

In this case, an undefined weak symbol `undef_weak_fun` has an address
of zero, and Bolt incorrectly changes the relocation for the
corresponding symbol to symbol at PLT, leading to incorrect runtime
behavior.
---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 11 +++-
 bolt/test/AArch64/weak-reference-relocation.s | 55 +++++++++++++++++++
 bolt/test/CMakeLists.txt                      |  1 +
 bolt/test/lit.cfg.py                          |  1 +
 llvm/utils/gn/secondary/bolt/test/BUILD.gn    |  1 +
 5 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/AArch64/weak-reference-relocation.s

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index ddcc21878abb818..daa9bca1b2a1b4d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1974,6 +1974,14 @@ bool RewriteInstance::analyzeRelocation(
   if (!Relocation::isSupported(RType))
     return false;
 
+  auto isWeakReference = [](const SymbolRef &Symbol) {
+    Expected<uint32_t> SymFlagsOrErr = Symbol.getFlags();
+    if (!SymFlagsOrErr)
+      return false;
+    return (*SymFlagsOrErr & SymbolRef::SF_Undefined) &&
+           (*SymFlagsOrErr & SymbolRef::SF_Weak);
+  };
+
   const bool IsAArch64 = BC->isAArch64();
 
   const size_t RelSize = Relocation::getSizeForType(RType);
@@ -2005,7 +2013,8 @@ bool RewriteInstance::analyzeRelocation(
     // Section symbols are marked as ST_Debug.
     IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
     // Check for PLT entry registered with symbol name
-    if (!SymbolAddress && (IsAArch64 || BC->isRISCV())) {
+    if (!SymbolAddress && !isWeakReference(Symbol) &&
+        (IsAArch64 || BC->isRISCV())) {
       const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
       SymbolAddress = BD ? BD->getAddress() : 0;
     }
diff --git a/bolt/test/AArch64/weak-reference-relocation.s b/bolt/test/AArch64/weak-reference-relocation.s
new file mode 100644
index 000000000000000..5ce0c243e69267e
--- /dev/null
+++ b/bolt/test/AArch64/weak-reference-relocation.s
@@ -0,0 +1,55 @@
+// This test checks whether BOLT can correctly update
+// a symbol that is weak and undefined.
+
+// The assembly code is generated from the source code
+// below by GCC10 with the option `-nostartfiles`.
+// #include <stdlib.h>
+// __attribute__((weak)) void func();
+// void _start() {
+//   if (func)
+//     func();
+//  exit(0);
+// }
+
+# RUN: %clang -Wl,-q -nostartfiles %s -o %t.exe
+# RUN: llvm-bolt %t.exe -o %t.bolt
+# RUN: obj2yaml %t.bolt | FileCheck %s
+
+CHECK:  - Name:            .rodata
+CHECK-NEXT:    Type:            SHT_PROGBITS
+CHECK-NEXT:    Flags:           [ SHF_ALLOC ]
+CHECK-NEXT:    Address:         0x{{0*}}
+CHECK-NEXT:    AddressAlign:    0x{{0*}}
+CHECK-NEXT:    Content:         '{{0+}}'
+
+        .arch armv8-a
+        .text
+        .align  2
+        .global _start
+        .type   _start, %function
+_start:
+.LFB6:
+        .cfi_startproc
+        stp     x29, x30, [sp, -16]!
+        .cfi_def_cfa_offset 16
+        .cfi_offset 29, -16
+        .cfi_offset 30, -8
+        mov     x29, sp
+        adrp    x0, .LC0
+        add     x0, x0, :lo12:.LC0
+        ldr     x0, [x0]
+        cmp     x0, 0
+        beq     .L2
+        bl      func
+.L2:
+        mov     w0, 0
+        bl      exit
+        .cfi_endproc
+.LFE6:
+        .size   _start, .-_start
+        .section        .rodata
+        .align  3
+.LC0:
+        .xword  func
+        .weak   func
+        .section        .note.GNU-stack,"", at progbits
diff --git a/bolt/test/CMakeLists.txt b/bolt/test/CMakeLists.txt
index 89862fd59eb8ec2..e01a17537b2676c 100644
--- a/bolt/test/CMakeLists.txt
+++ b/bolt/test/CMakeLists.txt
@@ -53,6 +53,7 @@ list(APPEND BOLT_TEST_DEPS
   not
   split-file
   yaml2obj
+  obj2yaml
   )
 
 add_custom_target(bolt-test-depends DEPENDS ${BOLT_TEST_DEPS})
diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 3a6da210e01f080..2f07fff7d7e2d4d 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -101,6 +101,7 @@
     ToolSubst("llvm-bat-dump", unresolved="fatal"),
     ToolSubst("perf2bolt", unresolved="fatal"),
     ToolSubst("yaml2obj", unresolved="fatal"),
+    ToolSubst("obj2yaml", unresolved="fatal"),
     ToolSubst("llvm-mc", unresolved="fatal"),
     ToolSubst("llvm-nm", unresolved="fatal"),
     ToolSubst("llvm-objdump", unresolved="fatal"),
diff --git a/llvm/utils/gn/secondary/bolt/test/BUILD.gn b/llvm/utils/gn/secondary/bolt/test/BUILD.gn
index d8ef5606248970c..56f8336849625cf 100644
--- a/llvm/utils/gn/secondary/bolt/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/test/BUILD.gn
@@ -101,6 +101,7 @@ group("test") {
     "//llvm/utils/FileCheck",
     "//llvm/utils/count",
     "//llvm/utils/not",
+    "//llvm/tools/obj2yaml",
   ]
 
   testonly = true



More information about the llvm-commits mailing list