[llvm] [Bolt] fix a wrong relocation update issue with weak references (PR #69136)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 31 06:58:59 PDT 2023


https://github.com/linsinan1995 updated https://github.com/llvm/llvm-project/pull/69136

>From 345b857cd0d11df4cb010134d2a2694f8d39b714 Mon Sep 17 00:00:00 2001
From: Sinan Lin <sinan.lin at linux.alibaba.com>
Date: Mon, 16 Oct 2023 09:51:48 +0800
Subject: [PATCH] [Bolt] do not search for PLT entries if the relocation is
 against a weak reference symbol.

Take a common weak reference pattern for example
```
__attribute__((weak)) void undef_weak_fun();

  if (&undef_weak_fun)
    undef_weak_fun();
```

In this case, an undefined weak symbol `undef_weak_fun` has an address
of zero, and Bolt incorrectly changes the relocation for the
corresponding symbol to symbol at PLT, leading to incorrect runtime
behavior.
---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 11 ++-
 .../AArch64/Inputs/weak_reference_func_2.s    | 12 +++
 bolt/test/AArch64/weak-reference-relocation.s | 89 +++++++++++++++++++
 3 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/AArch64/Inputs/weak_reference_func_2.s
 create mode 100644 bolt/test/AArch64/weak-reference-relocation.s

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 0d78c9b75e03d32..c4796bf275e171d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1980,6 +1980,14 @@ bool RewriteInstance::analyzeRelocation(
   if (!Relocation::isSupported(RType))
     return false;
 
+  auto isWeakReference = [](const SymbolRef &Symbol) {
+    Expected<uint32_t> SymFlagsOrErr = Symbol.getFlags();
+    if (!SymFlagsOrErr)
+      return false;
+    return (*SymFlagsOrErr & SymbolRef::SF_Undefined) &&
+           (*SymFlagsOrErr & SymbolRef::SF_Weak);
+  };
+
   const bool IsAArch64 = BC->isAArch64();
 
   const size_t RelSize = Relocation::getSizeForType(RType);
@@ -2011,7 +2019,8 @@ bool RewriteInstance::analyzeRelocation(
     // Section symbols are marked as ST_Debug.
     IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
     // Check for PLT entry registered with symbol name
-    if (!SymbolAddress && (IsAArch64 || BC->isRISCV())) {
+    if (!SymbolAddress && !isWeakReference(Symbol) &&
+        (IsAArch64 || BC->isRISCV())) {
       const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
       SymbolAddress = BD ? BD->getAddress() : 0;
     }
diff --git a/bolt/test/AArch64/Inputs/weak_reference_func_2.s b/bolt/test/AArch64/Inputs/weak_reference_func_2.s
new file mode 100644
index 000000000000000..695afaa4ca0dd50
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/weak_reference_func_2.s
@@ -0,0 +1,12 @@
+    .text
+    .align  2
+    .global func_2
+    .type   func_2, %function
+func_2:
+.LFB0:
+    .cfi_startproc
+    nop
+    ret
+    .cfi_endproc
+.LFE0:
+    .size   func_2, .-func_2
diff --git a/bolt/test/AArch64/weak-reference-relocation.s b/bolt/test/AArch64/weak-reference-relocation.s
new file mode 100644
index 000000000000000..fc2970ecbf0e949
--- /dev/null
+++ b/bolt/test/AArch64/weak-reference-relocation.s
@@ -0,0 +1,89 @@
+// This test checks whether BOLT can correctly handle
+// relocations against weak symbols.
+
+// The assembly code is generated from the source code
+// below with GCC10.
+// test-1.c:
+// #include <stdlib.h>
+// __attribute__((weak)) void func_1();
+// __attribute__((weak)) void func_2();
+//
+// void _start() {
+//   if (&func_1)
+//     func_1();
+//   if (&func_2)
+//     func_2();
+//   exit0();
+// }
+//
+// test-2.c:
+// void func_2() {}
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %S/Inputs/weak_reference_func_2.s -o %t_func2.o
+# RUN: %clang -Wl,-q -nostartfiles -fuse-ld=lld %t.o %t_func2.o -o %t.exe
+# RUN: llvm-bolt %t.exe -o %t.bolt
+# RUN: llvm-nm -n %t.bolt > %t.out.txt
+# RUN: llvm-objdump -dj .rodata %t.bolt >> %t.out.txt
+# RUN: FileCheck %s --input-file=%t.out.txt
+
+# CHECK: {{0+}}[[#%x,ADDR:]] T func_2
+
+# CHECK: {{.*}} <.rodata>:
+# CHECK-NEXT: {{.*}} .word 0x{{[0]+}}[[#ADDR]]
+# CHECK-NEXT: {{.*}} .word 0x00000000
+
+        .text
+        .align  2
+        .weak   func_1
+        .type   func_1, %function
+func_1:
+.LFB0:
+        .cfi_startproc
+        nop
+        ret
+        .cfi_endproc
+.LFE0:
+        .size   func_1, .-func_1
+        .align  2
+        .global exit0
+        .type   exit0, %function
+exit0:
+.LFB1:
+        .cfi_startproc
+        mov     x0, 0
+        mov     w8, #93
+        svc     #0
+        nop
+        ret
+        .cfi_endproc
+.LFE1:
+        .size   exit0, .-exit0
+        .align  2
+        .global _start
+        .type   _start, %function
+_start:
+.LFB2:
+        .cfi_startproc
+        stp     x29, x30, [sp, -16]!
+        mov     x29, sp
+        bl      func_1
+        adrp    x0, .LC0
+        add     x0, x0, :lo12:.LC0
+        ldr     x0, [x0]
+        cmp     x0, 0
+        beq     .L4
+        bl      func_2
+.L4:
+        bl      exit0
+        nop
+        ldp     x29, x30, [sp], 16
+        ret
+        .cfi_endproc
+.LFE2:
+        .size   _start, .-_start
+        .section        .rodata
+        .align  3
+.LC0:
+        .xword  func_2
+        .weak   func_2



More information about the llvm-commits mailing list