[llvm] [Bolt] fix a wrong relocation update issue with weak references (PR #69136)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 05:23:49 PDT 2023
https://github.com/linsinan1995 updated https://github.com/llvm/llvm-project/pull/69136
>From f54c244d5073867b9bd73b73e269545779defb07 Mon Sep 17 00:00:00 2001
From: Sinan Lin <sinan.lin at linux.alibaba.com>
Date: Mon, 16 Oct 2023 09:51:48 +0800
Subject: [PATCH] [Bolt] do not search for PLT entries if the relocation is
against a weak reference symbol.
Take a common weak reference pattern for example
```
__attribute__((weak)) void undef_weak_fun();
if (&undef_weak_fun)
undef_weak_fun();
```
In this case, an undefined weak symbol `undef_weak_fun` has an address
of zero, and Bolt incorrectly changes the relocation for the
corresponding symbol to symbol at PLT, leading to incorrect runtime
behavior.
---
bolt/lib/Rewrite/RewriteInstance.cpp | 11 ++-
.../AArch64/Inputs/weak_reference_func_2.s | 12 +++
bolt/test/AArch64/weak-reference-relocation.s | 89 +++++++++++++++++++
3 files changed, 111 insertions(+), 1 deletion(-)
create mode 100644 bolt/test/AArch64/Inputs/weak_reference_func_2.s
create mode 100644 bolt/test/AArch64/weak-reference-relocation.s
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 0d78c9b75e03d32..c4796bf275e171d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1980,6 +1980,14 @@ bool RewriteInstance::analyzeRelocation(
if (!Relocation::isSupported(RType))
return false;
+ auto isWeakReference = [](const SymbolRef &Symbol) {
+ Expected<uint32_t> SymFlagsOrErr = Symbol.getFlags();
+ if (!SymFlagsOrErr)
+ return false;
+ return (*SymFlagsOrErr & SymbolRef::SF_Undefined) &&
+ (*SymFlagsOrErr & SymbolRef::SF_Weak);
+ };
+
const bool IsAArch64 = BC->isAArch64();
const size_t RelSize = Relocation::getSizeForType(RType);
@@ -2011,7 +2019,8 @@ bool RewriteInstance::analyzeRelocation(
// Section symbols are marked as ST_Debug.
IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
// Check for PLT entry registered with symbol name
- if (!SymbolAddress && (IsAArch64 || BC->isRISCV())) {
+ if (!SymbolAddress && !isWeakReference(Symbol) &&
+ (IsAArch64 || BC->isRISCV())) {
const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
SymbolAddress = BD ? BD->getAddress() : 0;
}
diff --git a/bolt/test/AArch64/Inputs/weak_reference_func_2.s b/bolt/test/AArch64/Inputs/weak_reference_func_2.s
new file mode 100644
index 000000000000000..695afaa4ca0dd50
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/weak_reference_func_2.s
@@ -0,0 +1,12 @@
+ .text
+ .align 2
+ .global func_2
+ .type func_2, %function
+func_2:
+.LFB0:
+ .cfi_startproc
+ nop
+ ret
+ .cfi_endproc
+.LFE0:
+ .size func_2, .-func_2
diff --git a/bolt/test/AArch64/weak-reference-relocation.s b/bolt/test/AArch64/weak-reference-relocation.s
new file mode 100644
index 000000000000000..6be2ffe5f578a6f
--- /dev/null
+++ b/bolt/test/AArch64/weak-reference-relocation.s
@@ -0,0 +1,89 @@
+// This test checks whether BOLT can correctly handle
+// relocations against weak symbols.
+
+// The assembly code is generated from the source code
+// below with GCC10.
+// test-1.c:
+// #include <stdlib.h>
+// __attribute__((weak)) void func_1();
+// __attribute__((weak)) void func_2();
+//
+// void _start() {
+// if (&func_1)
+// func_1();
+// if (&func_2)
+// func_2();
+// exit0();
+// }
+//
+// test-2.c:
+// void func_2() {}
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %S/Inputs/weak_reference_func_2.s -o %t_func2.o
+# RUN: %clang -Wl,-q -nostartfiles %t.o %t_func2.o -o %t.exe
+# RUN: llvm-bolt %t.exe -o %t.bolt
+# RUN: llvm-nm -n %t.bolt > %t.out.txt
+# RUN: llvm-objdump -dj .rodata %t.bolt >> %t.out.txt
+# RUN: FileCheck %s --input-file=%t.out.txt
+
+# CHECK: {{0+}}[[#%x,ADDR:]] T func_2
+
+# CHECK: {{.*}} <.rodata>:
+# CHECK-NEXT: {{.*}} .word 0x{{[0]+}}[[#ADDR]]
+# CHECK-NEXT: {{.*}} .word 0x00000000
+
+ .text
+ .align 2
+ .weak func_1
+ .type func_1, %function
+func_1:
+.LFB0:
+ .cfi_startproc
+ nop
+ ret
+ .cfi_endproc
+.LFE0:
+ .size func_1, .-func_1
+ .align 2
+ .global exit0
+ .type exit0, %function
+exit0:
+.LFB1:
+ .cfi_startproc
+ mov x0, 0
+ mov w8, #93
+ svc #0
+ nop
+ ret
+ .cfi_endproc
+.LFE1:
+ .size exit0, .-exit0
+ .align 2
+ .global _start
+ .type _start, %function
+_start:
+.LFB2:
+ .cfi_startproc
+ stp x29, x30, [sp, -16]!
+ mov x29, sp
+ bl func_1
+ adrp x0, .LC0
+ add x0, x0, :lo12:.LC0
+ ldr x0, [x0]
+ cmp x0, 0
+ beq .L4
+ bl func_2
+.L4:
+ bl exit0
+ nop
+ ldp x29, x30, [sp], 16
+ ret
+ .cfi_endproc
+.LFE2:
+ .size _start, .-_start
+ .section .rodata
+ .align 3
+.LC0:
+ .xword func_2
+ .weak func_2
More information about the llvm-commits
mailing list