[lld] f21704e - [LLD][PowerPC] Fix bug in PC-Relative initial exec
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 22 11:16:07 PDT 2021
Author: Stefan Pintilie
Date: 2021-03-22T13:15:44-05:00
New Revision: f21704e080a04580ef837822244a624c6e2e7cac
URL: https://github.com/llvm/llvm-project/commit/f21704e080a04580ef837822244a624c6e2e7cac
DIFF: https://github.com/llvm/llvm-project/commit/f21704e080a04580ef837822244a624c6e2e7cac.diff
LOG: [LLD][PowerPC] Fix bug in PC-Relative initial exec
There is a bug when initial exec is relaxed to local exec.
In the following situation:
InitExec.c
```
extern __thread unsigned TGlobal;
unsigned getConst(unsigned*);
unsigned addVal(unsigned, unsigned*);
unsigned GetAddrT() {
return addVal(getConst(&TGlobal), &TGlobal);
}
```
Def.c
```
__thread unsigned TGlobal;
unsigned getConst(unsigned* A) {
return *A + 3;
}
unsigned addVal(unsigned A, unsigned* B) {
return A + *B;
}
```
The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem.
To compile everything:
```
clang -O3 -mcpu=pwr10 -c InitExec.c
clang -O3 -mcpu=pwr10 -c Def.c
ld.lld InitExec.o Def.o -o IeToLe
```
If you objdump the problem object file:
```
$ llvm-objdump -dr --mcpu=pwr10 InitExec.o
```
you will get the following assembly:
```
0000000000000000 <GetAddrT>:
0: a6 02 08 7c mflr 0
4: f0 ff c1 fb std 30, -16(1)
8: 10 00 01 f8 std 0, 16(1)
c: d1 ff 21 f8 stdu 1, -48(1)
10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1
0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal
18: 14 6a c3 7f add 30, 3, 13
0000000000000019: R_PPC64_TLS TGlobal
1c: 78 f3 c3 7f mr 3, 30
20: 01 00 00 48 bl 0x20
0000000000000020: R_PPC64_REL24_NOTOC getConst
24: 78 f3 c4 7f mr 4, 30
28: 30 00 21 38 addi 1, 1, 48
2c: 10 00 01 e8 ld 0, 16(1)
30: f0 ff c1 eb ld 30, -16(1)
34: a6 03 08 7c mtlr 0
38: 00 00 00 48 b 0x38
0000000000000038: R_PPC64_REL24_NOTOC addVal
```
The lines of interest are:
```
10: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1
0000000000000010: R_PPC64_GOT_TPREL_PCREL34 TGlobal
18: 14 6a c3 7f add 30, 3, 13
0000000000000019: R_PPC64_TLS TGlobal
1c: 78 f3 c3 7f mr 3, 30
```
Which once linked gets turned into:
```
10010210: ff ff 03 06 00 90 6d 38 paddi 3, 13, -28672, 0
10010218: 00 00 00 60 nop
1001021c: 78 f3 c3 7f mr 3, 30
```
The problem is that register 30 is never set after the optimization.
Therefore it is not correct to relax the above instructions by replacing
the add instruction with a nop.
Instead the add instruction should be replaced with a copy (mr) instruction.
If the add uses the same resgiter as input and as ouput then it is safe to
continue to replace the add with a nop.
Reviewed By: MaskRay
Differential Revision: https://reviews.llvm.org/D95262
Added:
lld/test/ELF/ppc64-tls-le-relax.s
Modified:
lld/ELF/Arch/PPC64.cpp
lld/test/ELF/ppc64-tls-pcrel-ie.s
Removed:
################################################################################
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 03ecc811b2cf1..a0c2d1617caa2 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -920,7 +920,15 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
// that comes before it will already have computed the address of the
// symbol.
if (secondaryOp == 266) {
- write32(loc - 1, NOP);
+ // Check if the add uses the same result register as the input register.
+ uint32_t rt = (tlsInstr & 0x03E00000) >> 21; // bits 6-10
+ uint32_t ra = (tlsInstr & 0x001F0000) >> 16; // bits 11-15
+ if (ra == rt) {
+ write32(loc - 1, NOP);
+ } else {
+ // mr rt, ra
+ write32(loc - 1, 0x7C000378 | (rt << 16) | (ra << 21) | (ra << 11));
+ }
} else {
uint32_t dFormOp = getPPCDFormOp(secondaryOp);
if (dFormOp == 0)
diff --git a/lld/test/ELF/ppc64-tls-le-relax.s b/lld/test/ELF/ppc64-tls-le-relax.s
new file mode 100644
index 0000000000000..3c3bc33a65b73
--- /dev/null
+++ b/lld/test/ELF/ppc64-tls-le-relax.s
@@ -0,0 +1,81 @@
+# REQUIRES: ppc
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/initexec -o %t/initexec.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t/defs.o
+# RUN: ld.lld %t/initexec.o %t/defs.o -o %t/out
+# RUN: llvm-objdump -d --mcpu=pwr10 --no-show-raw-insn %t/out | FileCheck %s
+
+# CHECK-LABEL: <GetAddrT>:
+# CHECK: mflr 0
+# CHECK-NEXT: std 30, -16(1)
+# CHECK-NEXT: std 0, 16(1)
+# CHECK-NEXT: stdu 1, -48(1)
+# CHECK-NEXT: paddi 3, 13, -28672, 0
+# CHECK-NEXT: mr 30, 3
+# CHECK-NEXT: mr 3, 30
+# CHECK-NEXT: bl
+# CHECK-NEXT: mr 4, 30
+# CHECK-NEXT: addi 1, 1, 48
+# CHECK-NEXT: ld 0, 16(1)
+# CHECK-NEXT: ld 30, -16(1)
+# CHECK-NEXT: mtlr 0
+# CHECK-NEXT: b
+
+## Generated From:
+## extern __thread unsigned TGlobal;
+## unsigned getConst(unsigned*);
+## unsigned addVal(unsigned, unsigned*);
+##
+## unsigned GetAddrT() {
+## return addVal(getConst(&TGlobal), &TGlobal);
+## }
+
+//--- initexec
+GetAddrT:
+ mflr 0
+ std 30, -16(1)
+ std 0, 16(1)
+ stdu 1, -48(1)
+ pld 3, TGlobal at got@tprel at pcrel(0), 1
+ add 30, 3, TGlobal at tls@pcrel
+ mr 3, 30
+ bl getConst at notoc
+ mr 4, 30
+ addi 1, 1, 48
+ ld 0, 16(1)
+ ld 30, -16(1)
+ mtlr 0
+ b addVal at notoc
+
+## Generated From:
+## __thread unsigned TGlobal;
+##
+## unsigned getConst(unsigned* A) {
+## return *A + 3;
+## }
+##
+## unsigned addVal(unsigned A, unsigned* B) {
+## return A + *B;
+## }
+
+//--- defs
+.globl getConst
+getConst:
+ lwz 3, 0(3)
+ addi 3, 3, 3
+ clrldi 3, 3, 32
+ blr
+
+.globl addVal
+addVal:
+ lwz 4, 0(4)
+ add 3, 4, 3
+ clrldi 3, 3, 32
+ blr
+
+.section .tbss,"awT", at nobits
+.globl TGlobal
+.p2align 2
+TGlobal:
+ .long 0
+ .size TGlobal, 4
diff --git a/lld/test/ELF/ppc64-tls-pcrel-ie.s b/lld/test/ELF/ppc64-tls-pcrel-ie.s
index 93a286a242372..eaa8619086018 100644
--- a/lld/test/ELF/ppc64-tls-pcrel-ie.s
+++ b/lld/test/ELF/ppc64-tls-pcrel-ie.s
@@ -54,9 +54,9 @@ y:
# LE-RELOC: There are no relocations in this file.
-# LE-SYM: Symbol table '.symtab' contains 7 entries:
-# LE-SYM: 5: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
-# LE-SYM: 6: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
+# LE-SYM: Symbol table '.symtab' contains 8 entries:
+# LE-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x
+# LE-SYM: 7: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y
# LE-GOT: could not find section '.got'
@@ -74,6 +74,20 @@ IEAddr:
add 3, 3, x at tls@pcrel
blr
+# IE-LABEL: <IEAddrCopy>:
+# IE-NEXT: pld 3, 12488(0), 1
+# IE-NEXT: add 4, 3, 13
+# IE-NEXT: blr
+# LE-LABEL: <IEAddrCopy>:
+# LE-NEXT: paddi 3, 13, -28672, 0
+# LE-NEXT: mr 4, 3
+# LE-NEXT: blr
+.section .text_addr, "ax", %progbits
+IEAddrCopy:
+ pld 3, x at got@tprel at pcrel(0), 1
+ add 4, 3, x at tls@pcrel
+ blr
+
# IE-LABEL: <IEVal>:
# IE-NEXT: pld 3, 8408(0), 1
# IE-NEXT: lwzx 3, 3, 13
More information about the llvm-commits
mailing list