[lld] [lld][ELF] Support relax R_LARCH_{ALIGN, PCALA_{HI20,LO12}, GOT_PC_{HI20,LO12}} (PR #78692)
Jinyang He via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 19 03:08:12 PST 2024
https://github.com/MQ-mengqing created https://github.com/llvm/llvm-project/pull/78692
Refer to commit 6611d58f5bbc ("Relax R_RISCV_ALIGN"), we can relax R_LARCH_ALIGN by same way. Reuse `SymbolAnchor`, `RISCVRelaxAux` and `initSymbolAnchors` to simplify codes. As `riscvFinalizeRelax` is an arch-specific function, put it override on `TargetInfo::finalizeRelax`, so that LoongArch can override it, too.
1, The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The difference is that LoongArch only has 4-bytes NOP and all executable insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. Alignment maxBytesEmit parameter is supported in psABI v2.30. For reliability, only one relax pass is supported in this patch.
2, Relax R_LARCH_GOT_PC_{HI20,LO12} to R_LARCH_PCALA_{HI20,LO12} if the sym is a local got. And it can perform the next relaxation rule.
3, Relax R_LARCH_PCALA_{HI20,LO12} to R_LARCH_PCREL20_S2 if the dest is 4-bytes aligned and (dest - pc) is a 22bits-simm.
>From f5a0c829d20113bf7f24398a4a1e1e0ba5d2d829 Mon Sep 17 00:00:00 2001
From: Jinyang He <hejinyang at loongson.cn>
Date: Fri, 19 Jan 2024 18:31:39 +0800
Subject: [PATCH] [lld][ELF] Support relax R_LARCH_{ALIGN, PCALA_{HI20,LO12},
GOT_PC_{HI20,LO12}}
Refer to commit 6611d58f5bbc ("Relax R_RISCV_ALIGN"), we can relax
R_LARCH_ALIGN by same way. Reuse `SymbolAnchor`, `RISCVRelaxAux` and
`initSymbolAnchors` to simplify codes. As `riscvFinalizeRelax` is an
arch-specific function, put it override on `TargetInfo::finalizeRelax`,
so that LoongArch can override it, too.
1, The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The
difference is that LoongArch only has 4-bytes NOP and all executable
insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence.
Alignment maxBytesEmit parameter is supported in psABI v2.30. For
reliability, only one relax pass is supported in this patch.
2, Relax R_LARCH_GOT_PC_{HI20,LO12} to R_LARCH_PCALA_{HI20,LO12} if the
sym is a local got. And it can perform the next relaxation rule.
3, Relax R_LARCH_PCALA_{HI20,LO12} to R_LARCH_PCREL20_S2 if the dest
is 4-bytes aligned and (dest - pc) is a 22bits-simm.
---
lld/ELF/Arch/LoongArch.cpp | 255 +++++++++++++++++++++++++-
lld/ELF/Arch/RISCV.cpp | 29 +--
lld/ELF/InputSection.cpp | 7 +-
lld/ELF/InputSection.h | 24 ++-
lld/ELF/Target.h | 3 +
lld/ELF/Writer.cpp | 4 +-
lld/test/ELF/loongarch-relax-align.s | 137 ++++++++++++++
lld/test/ELF/loongarch-relax-pcalas.s | 58 ++++++
8 files changed, 482 insertions(+), 35 deletions(-)
create mode 100644 lld/test/ELF/loongarch-relax-align.s
create mode 100644 lld/test/ELF/loongarch-relax-pcalas.s
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index ab2ec5b447d000..235bd0f783d8e8 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -36,6 +36,8 @@ class LoongArch final : public TargetInfo {
bool usesOnlyLowPageBits(RelType type) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
+ bool relaxOnce(int pass) const override;
+ void finalizeRelax(int passes) const override;
};
} // end anonymous namespace
@@ -48,6 +50,8 @@ enum Op {
ADDI_W = 0x02800000,
ADDI_D = 0x02c00000,
ANDI = 0x03400000,
+ PCADDI = 0x18000000,
+ PCALAU12I = 0x1a000000,
PCADDU12I = 0x1c000000,
LD_W = 0x28800000,
LD_D = 0x28c00000,
@@ -465,8 +469,9 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
case R_LARCH_TLS_GD_HI20:
return R_TLSGD_GOT;
case R_LARCH_RELAX:
- // LoongArch linker relaxation is not implemented yet.
- return R_NONE;
+ return config->relax ? R_RELAX_HINT : R_NONE;
+ case R_LARCH_ALIGN:
+ return R_RELAX_HINT;
// Other known relocs that are explicitly unimplemented:
//
@@ -659,6 +664,252 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
}
}
+// Relax R_LARCH_PCALA_{HI20,LO12}
+// pcalau12i+addi.{d,w} to pcaddi
+static void relaxPcalaAddi(const InputSection &sec, size_t i, uint64_t loc,
+ Relocation &r, Relocation &rl, uint32_t &remove) {
+ const Symbol &sym = *r.sym;
+ const uint32_t pca = read64le(sec.content().data() + r.offset);
+ const uint32_t adi = read64le(sec.content().data() + rl.offset);
+ const uint32_t rd = pca & 0x1f;
+ const uint32_t op_addi = config->is64 ? ADDI_D : ADDI_W;
+ const uint64_t dest = sym.getVA() + r.addend;
+ const int64_t displace = dest - loc;
+
+ if (!(dest & 0x3) && isInt<22>(displace) && rd == (adi & 0x1f) &&
+ rd == ((adi >> 5) & 0x1f) && rl.offset == r.offset + 4 &&
+ (adi & 0xffc00000) == op_addi) {
+ sec.relaxAux->relocTypes[i] = R_LARCH_PCREL20_S2;
+ sec.relaxAux->relocTypes[i + 2] = R_LARCH_RELAX;
+ sec.relaxAux->writes.push_back(PCADDI | rd);
+ remove = 4;
+ }
+}
+
+// Relax R_LARCH_GOT_PC_{HI20,LO12}
+// pcalau12i+ld.{d,w} to pcalau12i+addi.{d,w} or pcaddi
+static void relaxPcalaLd(const InputSection &sec, size_t i, uint64_t loc,
+ Relocation &r, Relocation &rl, uint32_t &remove) {
+ const Symbol &sym = *r.sym;
+ const uint32_t pca = read64le(sec.content().data() + r.offset);
+ const uint32_t ld = read64le(sec.content().data() + rl.offset);
+ const uint32_t rd = pca & 0x1f;
+ const uint32_t op_addi = config->is64 ? ADDI_D : ADDI_W;
+ const uint32_t op_ld = config->is64 ? LD_D : LD_W;
+ const uint64_t dest = sym.getVA() + r.addend;
+ const int64_t displace = dest - loc;
+
+ if (!sym.isLocal())
+ return;
+
+ if (rd != (ld & 0x1f) || rd != ((ld >> 5) & 0x1f) ||
+ rl.offset != r.offset + 4 || (ld & 0xffc00000) != op_ld)
+ return;
+
+ if (!(dest & 0x3) && isInt<22>(displace)) {
+ sec.relaxAux->relocTypes[i] = R_LARCH_PCREL20_S2;
+ sec.relaxAux->relocTypes[i + 2] = R_LARCH_RELAX;
+ sec.relaxAux->writes.push_back(PCADDI | rd);
+ remove = 4;
+ } else {
+ sec.relaxAux->relocTypes[i] = R_LARCH_PCALA_HI20;
+ sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCALA_LO12;
+ sec.relaxAux->writes.push_back(PCALAU12I | rd);
+ sec.relaxAux->writes.push_back(op_addi | (rd << 5) | rd);
+ }
+}
+
+static bool relax(InputSection &sec) {
+ const uint64_t secAddr = sec.getVA();
+ auto &aux = *sec.relaxAux;
+ bool changed = false;
+ ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
+ uint64_t delta = 0;
+
+ std::fill_n(aux.relocTypes.get(), sec.relocs().size(), R_LARCH_NONE);
+ aux.writes.clear();
+ for (auto [i, r] : llvm::enumerate(sec.relocs())) {
+ const uint64_t loc = secAddr + r.offset - delta;
+ uint32_t &cur = aux.relocDeltas[i], remove = 0;
+ switch (r.type) {
+ case R_LARCH_ALIGN: {
+ const uint64_t addend = r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend;
+ const uint64_t allBytes = (1 << (addend & 0xff)) - 4;
+ const uint64_t align = 1 << (addend & 0xff);
+ const uint64_t maxBytes = addend >> 8;
+ const uint64_t off = loc & (align - 1);
+ const uint64_t curBytes = off == 0 ? 0 : align - off;
+ // All bytes beyond the alignment boundary should be removed.
+ // If emit bytes more than max bytes to emit, remove all.
+ if (maxBytes != 0 && curBytes > maxBytes)
+ remove = allBytes;
+ else
+ remove = allBytes - curBytes;
+ assert(static_cast<int32_t>(remove) >= 0 &&
+ "R_LARCH_ALIGN needs expanding the content");
+ break;
+ }
+ case R_LARCH_PCALA_HI20:
+ if (i + 3 < sec.relocs().size() &&
+ sec.relocs()[i + 1].type == R_LARCH_RELAX &&
+ sec.relocs()[i + 2].type == R_LARCH_PCALA_LO12 &&
+ sec.relocs()[i + 3].type == R_LARCH_RELAX)
+ relaxPcalaAddi(sec, i, loc, r, sec.relocs()[i + 2], remove);
+ break;
+ case R_LARCH_GOT_PC_HI20:
+ if (i + 3 < sec.relocs().size() &&
+ sec.relocs()[i + 1].type == R_LARCH_RELAX &&
+ sec.relocs()[i + 2].type == R_LARCH_GOT_PC_LO12 &&
+ sec.relocs()[i + 3].type == R_LARCH_RELAX)
+ relaxPcalaLd(sec, i, loc, r, sec.relocs()[i + 2], remove);
+ break;
+ }
+
+ // For all anchors whose offsets are <= r.offset, they are preceded by
+ // the previous relocation whose `relocDeltas` value equals `delta`.
+ // Decrease their st_value and update their st_size.
+ for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
+ if (sa[0].end)
+ sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
+ else
+ sa[0].d->value = sa[0].offset - delta;
+ }
+ delta += remove;
+ if (delta != cur) {
+ cur = delta;
+ changed = true;
+ }
+ }
+
+ for (const SymbolAnchor &a : sa) {
+ if (a.end)
+ a.d->size = a.offset - delta - a.d->value;
+ else
+ a.d->value = a.offset - delta;
+ }
+ // Inform assignAddresses that the size has changed.
+ if (!isUInt<32>(delta))
+ fatal("section size decrease is too large: " + Twine(delta));
+ sec.bytesDropped = delta;
+ return changed;
+}
+
+// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
+// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
+// shrinkage may reduce displacement and make more relocations eligible for
+// relaxation. Code shrinkage may increase displacement to a call/load/store
+// target at a higher fixed address, invalidating an earlier relaxation. Any
+// change in section sizes can have cascading effect and require another
+// relaxation pass.
+bool LoongArch::relaxOnce(int pass) const {
+ if (config->relocatable)
+ return false;
+
+ // TODO Relax multiple times.
+ if (pass > 0)
+ return false;
+
+ initSymbolAnchors();
+ SmallVector<InputSection *, 0> storage;
+ bool changed = false;
+ for (OutputSection *osec : outputSections) {
+ if (!(osec->flags & SHF_EXECINSTR))
+ continue;
+ for (InputSection *sec : getInputSections(*osec, storage))
+ changed |= relax(*sec);
+ }
+ return changed;
+}
+
+void LoongArch::finalizeRelax(int passes) const {
+ SmallVector<InputSection *, 0> storage;
+ for (OutputSection *osec : outputSections) {
+ if (!(osec->flags & SHF_EXECINSTR))
+ continue;
+ for (InputSection *sec : getInputSections(*osec, storage)) {
+ RelaxAux &aux = *sec->relaxAux;
+ if (!aux.relocDeltas)
+ continue;
+
+ MutableArrayRef<Relocation> rels = sec->relocs();
+ ArrayRef<uint8_t> old = sec->content();
+ size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
+ size_t writesIdx = 0;
+ uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
+ uint64_t offset = 0;
+ int64_t delta = 0;
+ sec->content_ = p;
+ sec->size = newSize;
+ sec->bytesDropped = 0;
+
+ // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
+ // instructions for relaxed relocations.
+ for (size_t i = 0, e = rels.size(); i != e; ++i) {
+ uint32_t remove = aux.relocDeltas[i] - delta;
+ delta = aux.relocDeltas[i];
+ if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE)
+ continue;
+
+ // Copy from last location to the current relocated location.
+ const Relocation &r = rels[i];
+ uint64_t size = r.offset - offset;
+
+ // The rel.type may be fixed to R_LARCH_RELAX and the instruction
+ // will be deleted, then cause size<0
+ if (remove == 0 && (int64_t)size < 0) {
+ assert(aux.relocTypes[i] == R_LARCH_RELAX && "Unexpected size");
+ continue;
+ }
+
+ memcpy(p, old.data() + offset, size);
+ p += size;
+
+ int64_t skip = 0;
+ if (RelType newType = aux.relocTypes[i]) {
+ switch (newType) {
+ case R_LARCH_RELAX:
+ // Indicate the relocation is ignored.
+ break;
+ case R_LARCH_PCREL20_S2:
+ case R_LARCH_PCALA_HI20:
+ case R_LARCH_PCALA_LO12:
+ skip = 4;
+ write32le(p, aux.writes[writesIdx++]);
+ break;
+ default:
+ llvm_unreachable("unsupported type");
+ }
+ }
+
+ p += skip;
+ offset = r.offset + skip + remove;
+ }
+ memcpy(p, old.data() + offset, old.size() - offset);
+
+ // Subtract the previous relocDeltas value from the relocation offset.
+ // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
+ // their r_offset by the same delta.
+ delta = 0;
+ for (size_t i = 0, e = rels.size(); i != e;) {
+ uint64_t cur = rels[i].offset;
+ do {
+ rels[i].offset -= delta;
+ // Fix expr so that getRelocTargetVA will get update value.
+ if (aux.relocTypes[i] == R_LARCH_PCREL20_S2)
+ rels[i].expr = R_PC;
+ else if (aux.relocTypes[i] == R_LARCH_PCALA_HI20)
+ rels[i].expr = R_LOONGARCH_PAGE_PC;
+ else if (aux.relocTypes[i] == R_LARCH_PCALA_LO12)
+ rels[i].expr = R_ABS;
+ if (aux.relocTypes[i] != R_LARCH_NONE)
+ rels[i].type = aux.relocTypes[i];
+ } while (++i != e && rels[i].offset == cur);
+ delta = aux.relocDeltas[i - 1];
+ }
+ }
+ }
+}
+
TargetInfo *elf::getLoongArchTargetInfo() {
static LoongArch target;
return ⌖
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index d7d3d3e4781497..4536236ff85e89 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -45,6 +45,7 @@ class RISCV final : public TargetInfo {
uint64_t val) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
bool relaxOnce(int pass) const override;
+ void finalizeRelax(int passes) const override;
};
} // end anonymous namespace
@@ -560,33 +561,13 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
}
}
-namespace {
-struct SymbolAnchor {
- uint64_t offset;
- Defined *d;
- bool end; // true for the anchor of st_value+st_size
-};
-} // namespace
-
-struct elf::RISCVRelaxAux {
- // This records symbol start and end offsets which will be adjusted according
- // to the nearest relocDeltas element.
- SmallVector<SymbolAnchor, 0> anchors;
- // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
- // 0).
- std::unique_ptr<uint32_t[]> relocDeltas;
- // For relocations[i], the actual type is relocTypes[i].
- std::unique_ptr<RelType[]> relocTypes;
- SmallVector<uint32_t, 0> writes;
-};
-
-static void initSymbolAnchors() {
+void elf::initSymbolAnchors() {
SmallVector<InputSection *, 0> storage;
for (OutputSection *osec : outputSections) {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
- sec->relaxAux = make<RISCVRelaxAux>();
+ sec->relaxAux = make<RelaxAux>();
if (sec->relocs().size()) {
sec->relaxAux->relocDeltas =
std::make_unique<uint32_t[]>(sec->relocs().size());
@@ -819,7 +800,7 @@ bool RISCV::relaxOnce(int pass) const {
return changed;
}
-void elf::riscvFinalizeRelax(int passes) {
+void RISCV::finalizeRelax(int passes) const {
llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation");
log("relaxation passes: " + Twine(passes));
SmallVector<InputSection *, 0> storage;
@@ -827,7 +808,7 @@ void elf::riscvFinalizeRelax(int passes) {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
- RISCVRelaxAux &aux = *sec->relaxAux;
+ RelaxAux &aux = *sec->relaxAux;
if (!aux.relocDeltas)
continue;
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 586404643cc1fd..80421f124ecba7 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -352,9 +352,10 @@ InputSectionBase *InputSection::getRelocatedSection() const {
template <class ELFT, class RelTy>
void InputSection::copyRelocations(uint8_t *buf) {
- if (config->relax && !config->relocatable && config->emachine == EM_RISCV) {
- // On RISC-V, relaxation might change relocations: copy from internal ones
- // that are updated by relaxation.
+ if (config->relax && !config->relocatable &&
+ (config->emachine == EM_RISCV || config->emachine == EM_LOONGARCH)) {
+ // On LoongArch and RISC-V, relaxation might change relocations: copy
+ // from internal ones that are updated by relaxation.
InputSectionBase *sec = getRelocatedSection();
copyRelocations<ELFT, RelTy>(buf, llvm::make_range(sec->relocations.begin(),
sec->relocations.end()));
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index fbaea57bd586b0..9fbe85fce1f24c 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -101,7 +101,23 @@ class SectionBase {
link(link), info(info) {}
};
-struct RISCVRelaxAux;
+struct SymbolAnchor {
+ uint64_t offset;
+ Defined *d;
+ bool end; // true for the anchor of st_value+st_size
+};
+
+struct RelaxAux {
+ // This records symbol start and end offsets which will be adjusted according
+ // to the nearest relocDeltas element.
+ SmallVector<SymbolAnchor, 0> anchors;
+ // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
+ // 0).
+ std::unique_ptr<uint32_t[]> relocDeltas;
+ // For relocations[i], the actual type is relocTypes[i].
+ std::unique_ptr<RelType[]> relocTypes;
+ SmallVector<uint32_t, 0> writes;
+};
// This corresponds to a section of an input file.
class InputSectionBase : public SectionBase {
@@ -225,9 +241,9 @@ class InputSectionBase : public SectionBase {
// basic blocks.
JumpInstrMod *jumpInstrMod = nullptr;
- // Auxiliary information for RISC-V linker relaxation. RISC-V does not use
- // jumpInstrMod.
- RISCVRelaxAux *relaxAux;
+ // Auxiliary information for RISC-V and LoongArch linker relaxation.
+ // RISC-V and LoongArch does not use jumpInstrMod.
+ RelaxAux *relaxAux;
// The compressed content size when `compressed` is true.
size_t compressedSize;
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index ab6b6b9c013ba3..ed00e81c0e6c81 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -95,6 +95,8 @@ class TargetInfo {
// Do a linker relaxation pass and return true if we changed something.
virtual bool relaxOnce(int pass) const { return false; }
+ // Do finalize relaxation after collecting relaxation infos.
+ virtual void finalizeRelax(int passes) const {}
virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
JumpModType val) const {}
@@ -236,6 +238,7 @@ void addArmSyntheticSectionMappingSymbol(Defined *);
void sortArmMappingSymbols();
void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf);
void createTaggedSymbols(const SmallVector<ELFFileBase *, 0> &files);
+void initSymbolAnchors();
LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target;
TargetInfo *getTarget();
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index dfec5e07301a74..e2d83f0122b0b8 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1750,8 +1750,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
}
}
}
- if (!config->relocatable && config->emachine == EM_RISCV)
- riscvFinalizeRelax(pass);
+ if (!config->relocatable)
+ target->finalizeRelax(pass);
if (config->relocatable)
for (OutputSection *sec : outputSections)
diff --git a/lld/test/ELF/loongarch-relax-align.s b/lld/test/ELF/loongarch-relax-align.s
new file mode 100644
index 00000000000000..19588b5b21cb98
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-align.s
@@ -0,0 +1,137 @@
+# REQUIRES: loongarch
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o -o %t.32
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s --check-prefixes=CHECK,RELAX
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s --check-prefixes=CHECK,RELAX
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s --check-prefixes=CHECK,NOREL
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s --check-prefixes=CHECK,NOREL
+
+## Test the R_LARCH_ALIGN without symbol index.
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s --check-prefixes=CHECK,RELAX
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s --check-prefixes=CHECK,NOREL
+
+## -r keeps section contents unchanged.
+# RUN: ld.lld -r %t.64.o -o %t.64.r
+# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR
+
+# RELAX-DAG: {{0*}}10000 l .text {{0*}}34 .Ltext_start
+# RELAX-DAG: {{0*}}1002c l .text {{0*}}08 .L1
+# RELAX-DAG: {{0*}}10030 l .text {{0*}}04 .L2
+# RELAX-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+# NOREL-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
+# NOREL-DAG: {{0*}}10038 l .text {{0*}}0c .L1
+# NOREL-DAG: {{0*}}10040 l .text {{0*}}04 .L2
+# NOREL-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+
+# CHECK: <.Ltext_start>:
+# CHECK-NEXT: 10000: break 1
+# CHECK-NEXT: 10004: break 2
+# CHECK-NEXT: 10008: nop
+# CHECK-NEXT: 1000c: nop
+# CHECK-NEXT: 10010: break 3
+# CHECK-NEXT: 10014: break 4
+# CHECK-NEXT: 10018: nop
+# CHECK-NEXT: 1001c: nop
+# RELAX-NEXT: 10020: pcaddi $zero, -8
+# RELAX-NEXT: 10024: pcaddi $zero, 2
+# RELAX-NEXT: 10028: pcaddi $zero, 2
+# NOREL-NEXT: 10020: pcalau12i $zero, 0
+# NOREL-NEXT: 10024: addi.{{[dw]}} $zero, $zero, 0
+# NOREL-NEXT: 10028: pcalau12i $zero, 0
+# NOREL-NEXT: 1002c: addi.{{[dw]}} $zero, $zero, 56
+# NOREL-NEXT: 10030: pcalau12i $zero, 0
+# NOREL-NEXT: 10034: addi.{{[dw]}} $zero, $zero, 64
+# CHECK-EMPTY:
+# CHECK-NEXT: <.L1>:
+# RELAX-NEXT: 1002c: nop
+# NOREL-NEXT: 10038: nop
+# NOREL-NEXT: 1003c: nop
+# CHECK-EMPTY:
+# CHECK-NEXT: <.L2>:
+# CHECK-NEXT: break 5
+
+# CHECK: <.Ltext2_start>:
+# RELAX-NEXT: 20000: pcaddi $zero, 0
+# RELAX-NEXT: 20004: nop
+# NOREL-NEXT: 20000: pcalau12i $zero, 0
+# NOREL-NEXT: 20004: addi.{{[dw]}} $zero, $zero, 0
+# CHECK-NEXT: 20008: nop
+# CHECK-NEXT: 2000c: nop
+# CHECK-NEXT: 20010: break 6
+
+# CHECKR: <.Ltext2_start>:
+# CHECKR-NEXT: 0: pcalau12i $zero, 0
+# CHECKR-NEXT: {{0*}}00: R_LARCH_PCALA_HI20 .Ltext2_start
+# CHECKR-NEXT: {{0*}}00: R_LARCH_RELAX *ABS*
+# CHECKR-NEXT: 4: addi.d $zero, $zero, 0
+# CHECKR-NEXT: {{0*}}04: R_LARCH_PCALA_LO12 .Ltext2_start
+# CHECKR-NEXT: {{0*}}04: R_LARCH_RELAX *ABS*
+# CHECKR-NEXT: 8: nop
+# CHECKR-NEXT: {{0*}}08: R_LARCH_ALIGN .Lalign_symbol+0x4
+# CHECKR-NEXT: c: nop
+# CHECKR-NEXT: 10: nop
+# CHECKR-NEXT: 14: break 6
+
+.macro .fake_p2align_4 max=0
+ .ifdef old
+ .ifeq \max!=0
+ .reloc ., R_LARCH_ALIGN, 0xc
+ nop; nop; nop
+ .endif
+ .else
+ .reloc ., R_LARCH_ALIGN, .Lalign_symbol + 0x4 + (\max << 8)
+ nop; nop; nop
+ .endif
+.endm
+
+ .text
+.Lalign_symbol:
+.Ltext_start:
+ break 1
+ break 2
+## +0x8: Emit 2 nops, delete 1 nop.
+ .fake_p2align_4
+
+ break 3
+## +0x14: Emit 3 nops > 8 bytes, not emit.
+ .fake_p2align_4 8
+
+ break 4
+ .fake_p2align_4 8
+## +0x18: Emit 2 nops <= 8 bytes.
+
+## Compensate
+.ifdef old
+ nop; nop
+.endif
+
+## +0x20: Test symbol value and symbol size can be handled.
+ la.pcrel $r0, .Ltext_start
+ la.pcrel $r0, .L1
+ la.pcrel $r0, .L2
+
+## +0x2c --relax: Emit 1 nop, delete 2 nops.
+## +0x38 --no-relax: Emit 2 nops, delete 1 nop.
+.L1:
+ .fake_p2align_4
+.L2:
+ break 5
+ .size .L1, . - .L1
+ .size .L2, . - .L2
+ .size .Ltext_start, . - .Ltext_start
+
+## Test another text section.
+ .section .text2,"ax", at progbits
+.Ltext2_start:
+ la.pcrel $r0, .Ltext2_start
+ .fake_p2align_4
+ break 6
+ .size .Ltext2_start, . - .Ltext2_start
diff --git a/lld/test/ELF/loongarch-relax-pcalas.s b/lld/test/ELF/loongarch-relax-pcalas.s
new file mode 100644
index 00000000000000..0b2f847bc1b91c
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-pcalas.s
@@ -0,0 +1,58 @@
+# REQUIRES: loongarch
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
+# RUN: ld.lld %t.32.o --section-start=.text=0x1000000 --section-start=.data=0x1200000 -o %t.32
+# RUN: ld.lld %t.64.o --section-start=.text=0x1000000 --section-start=.data=0x1200000 -o %t.64
+# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck %s --check-prefixes=CHECKNN,CHECK32
+# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck %s --check-prefixes=CHECKNN,CHECK64
+
+ .text
+## Start at 0x1000000
+ .global _start
+_start:
+# CHECKNN: 1000000 <_start>:
+
+ la.pcrel $a0, ptr_1 # Not relaxed due to out of 22bits-simm range
+# CHECKNN-NEXT: 1000000: pcalau12i $a0, 512
+# CHECK32-NEXT: 1000004: addi.w $a0, $a0, 0
+# CHECK64-NEXT: 1000004: addi.d $a0, $a0, 0
+
+ la.pcrel $a0, ptr_1 # Should be relaxed to pcaddi
+# CHECKNN-NEXT: 1000008: pcaddi $a0, 524286
+
+ la.got $a0, ptr_1 # Should be relaxed to pcaddi
+# CHECKNN-NEXT: 100000c: pcaddi $a0, 524285
+
+ la.pcrel $a0, ptr_2 # Not relaxed due to not aligned
+# CHECKNN-NEXT: 1000010: pcalau12i $a0, 512
+# CHECK32-NEXT: 1000014: addi.w $a0, $a0, 1
+# CHECK64-NEXT: 1000014: addi.d $a0, $a0, 1
+
+ la.got $a0, ptr_2 # Should be relaxed to pcalau12i+addi
+# CHECKNN-NEXT: 1000018: pcalau12i $a0, 512
+# CHECK32-NEXT: 100001c: addi.w $a0, $a0, 1
+# CHECK64-NEXT: 100001c: addi.d $a0, $a0, 1
+
+ la.got $a0, ptr_3 # Not relaxed
+# CHECKNN-NEXT: 1000020: pcalau12i $a0, 512
+# CHECK32-NEXT: 1000024: ld.w $a0, $a0, 4
+# CHECK64-NEXT: 1000024: ld.d $a0, $a0, 8
+
+ la.got $a0, ptr_4 # Not relaxed
+# CHECKNN-NEXT: 1000028: pcalau12i $a0, 512
+# CHECK32-NEXT: 100002c: ld.w $a0, $a0, 8
+# CHECK64-NEXT: 100002c: ld.d $a0, $a0, 16
+
+ ret
+# CHECKNN-NEXT: 1000030: ret
+
+ .data
+## Start at 0x1200000
+ .local ptr_1, ptr_2
+ .global ptr_3
+ .weak ptr_4
+ptr_1: .byte 0
+ptr_2: .byte 0
+ptr_3: .byte 0
+ptr_4: .byte 0
More information about the llvm-commits
mailing list