[lld] [lld][LoongArch] Partially fix the handling of R_LARCH_PCALA64_* relocs (PR #73387)

Lu Weining via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 26 06:58:48 PST 2023


https://github.com/SixWeining updated https://github.com/llvm/llvm-project/pull/73387

>From 8dff73e7f11f928307ea7883bcdd8ea44cf31eac Mon Sep 17 00:00:00 2001
From: Weining Lu <luweining at loongson.cn>
Date: Sat, 25 Nov 2023 16:45:03 +0800
Subject: [PATCH 1/2] [lld][LoongArch] Partially fix the handling of
 R_LARCH_PCALA64_* relocs

Defer the compution of `negativeB` because adding 0x1000 to original
`result` may yield a different `negativeB` value. Actually this issue
was first reported by @rui314 at https://reviews.llvm.org/D138135#4568594.

Note that even with this patch, the handling of R_LARCH_PCALA64_*
relocs are NOT totally correct, because current approach assumes those
four instructions (pcalau12i/addi.d/lu32i.d/lu52i.d) are in the same
4K-page which is not always true. It is possible to document this
assumption as a constraint in psABI in future. But at least this patch
is necessary.

See https://github.com/llvm/llvm-project/pull/71907 and
https://github.com/loongson-community/discussions/issues/17 for details.
---
 lld/ELF/Arch/LoongArch.cpp          |  3 +--
 lld/test/ELF/loongarch-pc-aligned.s | 22 ++++++++++------------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 1c3e015efc1649a..9291ff14d674b64 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -157,10 +157,9 @@ uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) {
   //     result = page(dest) - page(pc) + 0x1000
   uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc);
   bool negativeA = lo12(dest) > 0x7ff;
-  bool negativeB = (result & 0x8000'0000) != 0;
-
   if (negativeA)
     result += 0x1000;
+  bool negativeB = (result & 0x8000'0000) != 0;
   if (negativeA && !negativeB)
     result -= 0x10000'0000;
   else if (!negativeA && negativeB)
diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s
index e7950400a5c8c45..742ac502e799804 100644
--- a/lld/test/ELF/loongarch-pc-aligned.s
+++ b/lld/test/ELF/loongarch-pc-aligned.s
@@ -260,31 +260,29 @@
 # EXTREME15-NEXT: lu32i.d   $t0, -349526
 # EXTREME15-NEXT: lu52i.d   $t0, $t0, -1093
 
-## FIXME: Correct %pc64_lo20 should be 0xfffff (-1) and %pc64_hi12 should be 0xfff (-1), but current values are:
-## page delta = 0x0000000000000000, page offset = 0x888
+## page delta = 0xffffffff00000000, page offset = 0x888
 ## %pc_lo12   = 0x888 = -1912
 ## %pc_hi20   = 0x00000 = 0
-## %pc64_lo20 = 0x00000 = 0
-## %pc64_hi12 = 0x00000 = 0
+## %pc64_lo20 = 0xfffff = -1
+## %pc64_hi12 = 0xfff = -1
 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x0000000012344888 --section-start=.text=0x0000000012345678 -o %t/extreme16
 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme16 | FileCheck %s --check-prefix=EXTREME16
 # EXTREME16:      addi.d $t0, $zero, -1912
 # EXTREME16-NEXT: pcalau12i $t1, 0
-# EXTREME16-NEXT: lu32i.d   $t0, 0
-# EXTREME16-NEXT: lu52i.d   $t0, $t0, 0
+# EXTREME16-NEXT: lu32i.d   $t0, -1
+# EXTREME16-NEXT: lu52i.d   $t0, $t0, -1
 
-## FIXME: Correct %pc64_lo20 should be 0x00000 (0) and %pc64_hi12 should be 0x000 (0), but current values are:
-## page delta = 0xffffffff80000000, page offset = 0x888
+## page delta = 0x0000000080000000, page offset = 0x888
 ## %pc_lo12   = 0x888 = -1912
 ## %pc_hi20   = 0x80000 = -524288
-## %pc64_lo20 = 0xfffff = -1
-## %pc64_hi12 = 0xfff = -1
+## %pc64_lo20 = 0xfffff = 0
+## %pc64_hi12 = 0xfff = 0
 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x000071238ffff888 --section-start=.text=0x0000712310000678 -o %t/extreme17
 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme17 | FileCheck %s --check-prefix=EXTREME17
 # EXTREME17:      addi.d $t0, $zero, -1912
 # EXTREME17-NEXT: pcalau12i $t1, -524288
-# EXTREME17-NEXT: lu32i.d   $t0, -1
-# EXTREME17-NEXT: lu52i.d   $t0, $t0, -1
+# EXTREME17-NEXT: lu32i.d   $t0, 0
+# EXTREME17-NEXT: lu52i.d   $t0, $t0, 0
 
 #--- a.s
 .rodata

>From bd9849c41aa97d99a0eee8673d0d223039d5d4d5 Mon Sep 17 00:00:00 2001
From: Weining Lu <luweining at loongson.cn>
Date: Sun, 26 Nov 2023 22:46:54 +0800
Subject: [PATCH 2/2] Use the same logic as bfd and mold

---
 lld/ELF/Arch/LoongArch.cpp | 89 ++++----------------------------------
 1 file changed, 9 insertions(+), 80 deletions(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 9291ff14d674b64..4bbf58242a06400 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -83,87 +83,16 @@ static uint32_t lo12(uint32_t val) { return val & 0xfff; }
 
 // Calculate the adjusted page delta between dest and PC.
 uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) {
-  // Consider the large code model access pattern, of which the smaller code
-  // models' access patterns are a subset:
-  //
-  //     pcalau12i       U, %foo_hi20(sym)        ; b in [-0x80000, 0x7ffff]
-  //     addi.d          T, zero, %foo_lo12(sym)  ; a in [-0x800, 0x7ff]
-  //     lu32i.d         T, %foo64_lo20(sym)      ; c in [-0x80000, 0x7ffff]
-  //     lu52i.d         T, T, %foo64_hi12(sym)   ; d in [-0x800, 0x7ff]
-  //     {ldx,stx,add}.* dest, U, T
-  //
-  // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA,
-  // with RQ, P, ZY, X and A representing the respective bitfields as unsigned
-  // integers. We have:
-  //
-  //     page(dest) = 0xZZZ'YYYYY'XXXXX'000
-  //     - page(pc) = 0xRRR'QQQQQ'PPPPP'000
-  //     ----------------------------------
-  //                  0xddd'ccccc'bbbbb'000
-  //
-  // Now consider the above pattern's actual effects:
-  //
-  //     page(pc)                     0xRRR'QQQQQ'PPPPP'000
-  //     pcalau12i                  + 0xiii'iiiii'bbbbb'000
-  //     addi                       + 0xjjj'jjjjj'kkkkk'AAA
-  //     lu32i.d & lu52i.d          + 0xddd'ccccc'00000'000
-  //     --------------------------------------------------
-  //     dest = U + T
-  //          = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32))
-  //          = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A
-  //          = (ZY<<32)                + (X<<12)           + A
-  //
-  //     ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k
-  //     cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k
-  //
-  // where i and k are terms representing the effect of b's and A's sign
-  // extension respectively.
-  //
-  //     i = signed b < 0 ? -0x10000'0000 : 0
-  //     k = signed A < 0 ? -0x1000 : 0
-  //
-  // The j term is a bit complex: it represents the higher half of
-  // sign-extended bits from A that are effectively lost if i == 0 but k != 0,
-  // due to overwriting by lu32i.d & lu52i.d.
-  //
-  //     j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0
-  //
-  // The actual effect of the instruction sequence before the final addition,
-  // i.e. our desired result value, is thus:
-  //
-  //     result = (cd<<32) + (b<<12)
-  //            = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k
-  //            = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k
-  //            = page(dest) - page(pc) - i - j - k
-  //
-  // when signed A >= 0 && signed b >= 0:
-  //
-  //     i = j = k = 0
-  //     result = page(dest) - page(pc)
-  //
-  // when signed A >= 0 && signed b < 0:
-  //
-  //     i = -0x10000'0000, j = k = 0
-  //     result = page(dest) - page(pc) + 0x10000'0000
-  //
-  // when signed A < 0 && signed b >= 0:
-  //
-  //     i = 0, j = 0x10000'0000, k = -0x1000
-  //     result = page(dest) - page(pc) - 0x10000'0000 + 0x1000
-  //
-  // when signed A < 0 && signed b < 0:
-  //
-  //     i = -0x10000'0000, j = 0, k = -0x1000
-  //     result = page(dest) - page(pc) + 0x1000
+  // Compensating all the sign-extensions is a bit complicated.
+  // Just use the same logic as bfd and mold. Note that this algorithm assumes
+  // those four instructions (pcalau12i/addi.d/lu32i.d/lu52i.d) are in the same
+  // 4K-page. This assumption is expected to be documented as a constraint in
+  // psABI in future.
   uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc);
-  bool negativeA = lo12(dest) > 0x7ff;
-  if (negativeA)
-    result += 0x1000;
-  bool negativeB = (result & 0x8000'0000) != 0;
-  if (negativeA && !negativeB)
-    result -= 0x10000'0000;
-  else if (!negativeA && negativeB)
-    result += 0x10000'0000;
+  if (dest & 0x800)
+    result += 0x1000 - 0x1'0000'0000;
+  if (result & 0x8000'0000)
+    result += 0x1'0000'0000;
   return result;
 }
 



More information about the llvm-commits mailing list