[lld] f77b77e - [ELF][RISCV] Relax local-exec TLS model

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 15 10:08:12 PDT 2022


Author: Fangrui Song
Date: 2022-07-15T10:08:08-07:00
New Revision: f77b77e8dbeb26764349a71b35f21b701adeee88

URL: https://github.com/llvm/llvm-project/commit/f77b77e8dbeb26764349a71b35f21b701adeee88
DIFF: https://github.com/llvm/llvm-project/commit/f77b77e8dbeb26764349a71b35f21b701adeee88.diff

LOG: [ELF][RISCV] Relax local-exec TLS model

In -mrelax mode, GCC/Clang may generate a local-exec TLS code sequence like:
```
# R_RISCV_TPREL_HI20, R_RISCV_RELAX
lui rd, %tprel_hi(x)
# R_RISCV_TPREL_ADD, R_RISCV_RELAX
add rd, rd, tp, %tprel_add(x)
# (R_RISCV_TPREL_LO12_I || R_RISCV_TPREL_LO12_S), R_RISCV_RELAX
addi rd, rd, %tprel_lo(x) || sw rs, %tprel(x)(rd)
```

Note: st_value(x) for TLS should be in the range [0,p_memsz(PT_TLS)).
When st_value(x) < 2048 (i.e. hi20(x) == 0), the linker can relax the code
sequence to:
```
addi rd, tp, st_value(x) || sw rs, st_value(x)(rd)
```

Differential Revision: https://reviews.llvm.org/D129425

Added: 
    

Modified: 
    lld/ELF/Arch/RISCV.cpp
    lld/test/ELF/riscv-tls-le.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index c09bb2e607863..8fca1a686a79c 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -57,6 +57,7 @@ enum Op {
 
 enum Reg {
   X_RA = 1,
+  X_TP = 4,
   X_T0 = 5,
   X_T1 = 6,
   X_T2 = 7,
@@ -76,6 +77,19 @@ static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) {
   return op | (rd << 7) | (imm << 12);
 }
 
+// Extract bits v[begin:end], where range is inclusive, and begin must be < 63.
+static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
+  return (v & ((1ULL << (begin + 1)) - 1)) >> end;
+}
+
+static uint32_t setLO12_I(uint32_t insn, uint32_t imm) {
+  return (insn & 0xfffff) | (imm << 20);
+}
+static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
+  return (insn & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
+         (extractBits(imm, 4, 0) << 7);
+}
+
 RISCV::RISCV() {
   copyRel = R_RISCV_COPY;
   pltRel = R_RISCV_JUMP_SLOT;
@@ -270,10 +284,9 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
   case R_RISCV_TPREL_LO12_I:
   case R_RISCV_TPREL_LO12_S:
     return R_TPREL;
-  case R_RISCV_TPREL_ADD:
-    return R_NONE;
   case R_RISCV_ALIGN:
     return R_RELAX_HINT;
+  case R_RISCV_TPREL_ADD:
   case R_RISCV_RELAX:
     return config->relax ? R_RELAX_HINT : R_NONE;
   default:
@@ -283,11 +296,6 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
   }
 }
 
-// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63.
-static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
-  return (v & ((1ULL << (begin + 1)) - 1)) >> end;
-}
-
 void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   const unsigned bits = config->wordsize * 8;
 
@@ -404,7 +412,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_RISCV_LO12_I: {
     uint64_t hi = (val + 0x800) >> 12;
     uint64_t lo = val - (hi << 12);
-    write32le(loc, (read32le(loc) & 0xFFFFF) | ((lo & 0xFFF) << 20));
+    write32le(loc, setLO12_I(read32le(loc), lo & 0xfff));
     return;
   }
 
@@ -413,9 +421,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_RISCV_LO12_S: {
     uint64_t hi = (val + 0x800) >> 12;
     uint64_t lo = val - (hi << 12);
-    uint32_t imm11_5 = extractBits(lo, 11, 5) << 25;
-    uint32_t imm4_0 = extractBits(lo, 4, 0) << 7;
-    write32le(loc, (read32le(loc) & 0x1FFF07F) | imm11_5 | imm4_0);
+    write32le(loc, setLO12_S(read32le(loc), lo));
     return;
   }
 
@@ -567,6 +573,35 @@ static void relaxCall(const InputSection &sec, size_t i, uint64_t loc,
   }
 }
 
+// Relax local-exec TLS when hi20 is zero.
+static void relaxTlsLe(const InputSection &sec, size_t i, uint64_t loc,
+                       Relocation &r, uint32_t &remove) {
+  uint64_t val = r.sym->getVA(r.addend);
+  if (hi20(val) != 0)
+    return;
+  uint32_t insn = read32le(sec.rawData.data() + r.offset);
+  switch (r.type) {
+  case R_RISCV_TPREL_HI20:
+  case R_RISCV_TPREL_ADD:
+    // Remove lui rd, %tprel_hi(x) and add rd, rd, tp, %tprel_add(x).
+    sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
+    remove = 4;
+    break;
+  case R_RISCV_TPREL_LO12_I:
+    // addi rd, rd, %tprel_lo(x) => addi rd, tp, st_value(x)
+    sec.relaxAux->relocTypes[i] = R_RISCV_32;
+    insn = (insn & ~(31 << 15)) | (X_TP << 15);
+    sec.relaxAux->writes.push_back(setLO12_I(insn, val));
+    break;
+  case R_RISCV_TPREL_LO12_S:
+    // sw rs, %tprel_lo(x)(rd) => sw rs, st_value(x)(rd)
+    sec.relaxAux->relocTypes[i] = R_RISCV_32;
+    insn = (insn & ~(31 << 15)) | (X_TP << 15);
+    sec.relaxAux->writes.push_back(setLO12_S(insn, val));
+    break;
+  }
+}
+
 static bool relax(InputSection &sec) {
   const uint64_t secAddr = sec.getVA();
   auto &aux = *sec.relaxAux;
@@ -612,6 +647,14 @@ static bool relax(InputSection &sec) {
           sec.relocations[i + 1].type == R_RISCV_RELAX)
         relaxCall(sec, i, loc, r, remove);
       break;
+    case R_RISCV_TPREL_HI20:
+    case R_RISCV_TPREL_ADD:
+    case R_RISCV_TPREL_LO12_I:
+    case R_RISCV_TPREL_LO12_S:
+      if (i + 1 != sec.relocations.size() &&
+          sec.relocations[i + 1].type == R_RISCV_RELAX)
+        relaxTlsLe(sec, i, loc, r, remove);
+      break;
     }
 
     // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -697,7 +740,7 @@ void elf::riscvFinalizeRelax(int passes) {
       for (size_t i = 0, e = rels.size(); i != e; ++i) {
         uint32_t remove = aux.relocDeltas[i] - delta;
         delta = aux.relocDeltas[i];
-        if (remove == 0)
+        if (remove == 0 && aux.relocTypes[i] == R_RISCV_NONE)
           continue;
 
         // Copy from last location to the current relocated location.
@@ -723,15 +766,24 @@ void elf::riscvFinalizeRelax(int passes) {
             }
           }
         } else if (RelType newType = aux.relocTypes[i]) {
-          const uint32_t insn = aux.writes[writesIdx++];
           switch (newType) {
+          case R_RISCV_RELAX:
+            // Used by relaxTlsLe to indicate the relocation is ignored.
+            break;
           case R_RISCV_RVC_JUMP:
             skip = 2;
-            write16le(p, insn);
+            write16le(p, aux.writes[writesIdx++]);
             break;
           case R_RISCV_JAL:
             skip = 4;
-            write32le(p, insn);
+            write32le(p, aux.writes[writesIdx++]);
+            break;
+          case R_RISCV_32:
+            // Used by relaxTlsLe to write a uint32_t then suppress the handling
+            // in relocateAlloc.
+            skip = 4;
+            write32le(p, aux.writes[writesIdx++]);
+            aux.relocTypes[i] = R_RISCV_NONE;
             break;
           default:
             llvm_unreachable("unsupported type");

diff  --git a/lld/test/ELF/riscv-tls-le.s b/lld/test/ELF/riscv-tls-le.s
index 96a10e940218d..752c126151489 100644
--- a/lld/test/ELF/riscv-tls-le.s
+++ b/lld/test/ELF/riscv-tls-le.s
@@ -1,48 +1,91 @@
 # REQUIRES: riscv
 
+## Additionally test that (a) -no-pie/-pie have the same behavior
+## (b) --no-relax/--relax have the same behavior when R_RISCV_RELAX is suppressed.
 # RUN: llvm-mc -filetype=obj -triple=riscv32 %s -o %t.32.o
-# RUN: ld.lld %t.32.o -o %t.32
+# RUN: ld.lld --relax %t.32.o -o %t.32
 # RUN: llvm-nm -p %t.32 | FileCheck --check-prefixes=NM %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s
-# RUN: ld.lld -pie %t.32.o -o %t.32
+# RUN: ld.lld -pie --no-relax %t.32.o -o %t.32
 # RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s
 
-# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.64.o
-# RUN: ld.lld %t.64.o -o %t.64
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax %s -o %t.64.o
+# RUN: ld.lld --no-relax %t.64.o -o %t.64
 # RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s
-# RUN: ld.lld -pie %t.64.o -o %t.64
+# RUN: ld.lld -pie --no-relax %t.64.o -o %t.64
 # RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s
+# RUN: ld.lld %t.64.o -o %t.64.relax
+# RUN: llvm-objdump -d --no-show-raw-insn %t.64.relax | FileCheck --check-prefixes=LE-RELAX %s
 
 # RUN: not ld.lld -shared %t.32.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
 
 # ERR: error: relocation R_RISCV_TPREL_HI20 against .LANCHOR0 cannot be used with -shared
 # ERR: error: relocation R_RISCV_TPREL_LO12_I against .LANCHOR0 cannot be used with -shared
 # ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_LO12_I against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_LO12_S against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
 # ERR: error: relocation R_RISCV_TPREL_LO12_S against a cannot be used with -shared
 
 # NM: {{0*}}00000008 b .LANCHOR0
-# NM: {{0*}}0000000c B a
+# NM: {{0*}}00000800 B a
 
 ## .LANCHOR0 at tprel = 8
 ## a at tprel = 12
-# LE:      lui a5, 0
-# LE-NEXT: add a5, a5, tp
-# LE-NEXT: addi a5, a5, 8
-# LE-NEXT: lui a5, 0
-# LE-NEXT: add a5, a5, tp
-# LE-NEXT: sw a0, 12(a5)
+# LE:      lui a1, 0
+# LE-NEXT: add a1, a1, tp
+# LE-NEXT: addi a1, a1, 8
+# LE-NEXT: lui a2, 0
+# LE-NEXT: add a2, a2, tp
+# LE-NEXT: addi a2, a2, 2044
+# LE-NEXT: lui a3, 0
+# LE-NEXT: addi a0, a0, 1
+# LE-NEXT: add a3, a3, tp
+# LE-NEXT: addi a0, a0, 2
+# LE-NEXT: sw a0, 2044(a3)
+# LE-NEXT: lui a4, 1
+# LE-NEXT: add a4, a4, tp
+# LE-NEXT: sw a0, -2048(a4)
+# LE-EMPTY:
+
+# LE-RELAX:      <.text>:
+# LE-RELAX-NEXT:   addi a1, tp, 8
+# LE-RELAX-NEXT:   addi a2, tp, 2044
+# LE-RELAX-NEXT:   addi a0, a0, 1
+# LE-RELAX-NEXT:   addi a0, a0, 2
+# LE-RELAX-NEXT:   sw a0, 2044(tp)
+# LE-RELAX-NEXT:   lui a4, 1
+# LE-RELAX-NEXT:   add a4, a4, tp
+# LE-RELAX-NEXT:   sw a0, -2048(a4)
+# LE-RELAX-EMPTY:
 
-lui a5, %tprel_hi(.LANCHOR0)
-add a5, a5, tp, %tprel_add(.LANCHOR0)
-addi a5, a5, %tprel_lo(.LANCHOR0)
+lui a1, %tprel_hi(.LANCHOR0)
+add a1, a1, tp, %tprel_add(.LANCHOR0)
+addi a1, a1, %tprel_lo(.LANCHOR0)
 
-lui a5, %tprel_hi(a)
-add a5, a5, tp, %tprel_add(a)
-sw a0, %tprel_lo(a)(a5)
+## hi20(a-4) = hi20(0x7fc) = 0. relaxable
+lui a2, %tprel_hi(a-4)
+add a2, a2, tp, %tprel_add(a-4)
+addi a2, a2, %tprel_lo(a-4)
+
+## hi20(a-4) = hi20(0x7fc) = 0. relaxable
+## Test non-adjacent instructions.
+lui a3, %tprel_hi(a-4)
+addi a0, a0, 1
+add a3, a3, tp, %tprel_add(a-4)
+addi a0, a0, 2
+sw a0, %tprel_lo(a-4)(a3)
+
+## hi20(a) = hi20(0x800) = 1. not relaxable
+lui a4, %tprel_hi(a)
+add a4, a4, tp, %tprel_add(a)
+sw a0, %tprel_lo(a)(a4)
 
 .section .tbss
 .space 8
 .LANCHOR0:
-.zero 4
+.space 0x800-8
 .globl a
 a:
+.zero 4


        


More information about the llvm-commits mailing list