[lld] 7b6a89f - [ELF] Detect convergence of output section addresses
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 31 09:31:19 PDT 2024
Author: Fangrui Song
Date: 2024-05-31T09:31:15-07:00
New Revision: 7b6a89f346f281e5b7caa593a8c484eaf4264055
URL: https://github.com/llvm/llvm-project/commit/7b6a89f346f281e5b7caa593a8c484eaf4264055
DIFF: https://github.com/llvm/llvm-project/commit/7b6a89f346f281e5b7caa593a8c484eaf4264055.diff
LOG: [ELF] Detect convergence of output section addresses
Some linker scripts don't converge. https://reviews.llvm.org/D66279
("[ELF] Make LinkerScript::assignAddresses iterative") detected
convergence of symbol assignments.
This patch detects convergence of output section addresses. While input
sections might also have convergence issues, they are less common as
expressions that could cause convergence issues typically involve output
sections and symbol assignments.
GNU ld has an error `non constant or forward reference address expression for section` that
correctly rejects
```
SECTIONS {
.text ADDR(.data)+0x1000 : { *(.text) }
.data : { *(.data) }
}
```
but not the following variant:
```
SECTIONS {
.text foo : { *(.text) }
.data : { *(.data) }
foo = ADDR(.data)+0x1000;
}
```
Our approach consistently rejects both cases.
Link: https://discourse.llvm.org/t/lld-and-layout-convergence/79232
Pull Request: https://github.com/llvm/llvm-project/pull/93888
Added:
lld/test/ELF/linkerscript/section-not-converge.test
Modified:
lld/ELF/LinkerScript.cpp
lld/ELF/LinkerScript.h
lld/ELF/Writer.cpp
lld/test/ELF/linkerscript/memory-err.s
Removed:
################################################################################
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index bfc13b658f5bf..68f5240ddc690 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -1025,13 +1025,14 @@ static OutputSection *findFirstSection(PhdrEntry *load) {
return nullptr;
}
-// This function assigns offsets to input sections and an output section
-// for a single sections command (e.g. ".text { *(.text); }").
-void LinkerScript::assignOffsets(OutputSection *sec) {
+// Assign addresses to an output section and offsets to its input sections and
+// symbol assignments. Return true if the output section's address has changed.
+bool LinkerScript::assignOffsets(OutputSection *sec) {
const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS;
const bool sameMemRegion = state->memRegion == sec->memRegion;
const bool prevLMARegionIsDefault = state->lmaRegion == nullptr;
const uint64_t savedDot = dot;
+ bool addressChanged = false;
state->memRegion = sec->memRegion;
state->lmaRegion = sec->lmaRegion;
@@ -1068,6 +1069,7 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
dot = alignToPowerOf2(dot, sec->addralign);
expandMemoryRegions(dot - pos);
}
+ addressChanged = sec->addr != dot;
sec->addr = dot;
// state->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT()
@@ -1151,6 +1153,7 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
state->tbssAddr = dot;
dot = savedDot;
}
+ return addressChanged;
}
static bool isDiscardable(const OutputSection &sec) {
@@ -1387,9 +1390,10 @@ LinkerScript::AddressState::AddressState() {
// Here we assign addresses as instructed by linker script SECTIONS
// sub-commands. Doing that allows us to use final VA values, so here
// we also handle rest commands like symbol assignments and ASSERTs.
-// Returns a symbol that has changed its section or value, or nullptr if no
-// symbol has changed.
-const Defined *LinkerScript::assignAddresses() {
+// Return an output section that has changed its address or null, and a symbol
+// that has changed its section or value (or nullptr if no symbol has changed).
+std::pair<const OutputSection *, const Defined *>
+LinkerScript::assignAddresses() {
if (script->hasSectionsCommand) {
// With a linker script, assignment of addresses to headers is covered by
// allocateHeaders().
@@ -1402,6 +1406,7 @@ const Defined *LinkerScript::assignAddresses() {
dot += getHeaderSize();
}
+ OutputSection *changedOsec = nullptr;
AddressState st;
state = &st;
errorOnMissingSection = true;
@@ -1416,11 +1421,12 @@ const Defined *LinkerScript::assignAddresses() {
assign->size = dot - assign->addr;
continue;
}
- assignOffsets(&cast<OutputDesc>(cmd)->osec);
+ if (assignOffsets(&cast<OutputDesc>(cmd)->osec) && !changedOsec)
+ changedOsec = &cast<OutputDesc>(cmd)->osec;
}
state = nullptr;
- return getChangedSymbolAssignment(oldValues);
+ return {changedOsec, getChangedSymbolAssignment(oldValues)};
}
static bool hasRegionOverflowed(MemoryRegion *mr) {
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 734d4e7498aa2..36feab36e26ba 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -300,7 +300,7 @@ class LinkerScript final {
std::pair<MemoryRegion *, MemoryRegion *>
findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
- void assignOffsets(OutputSection *sec);
+ bool assignOffsets(OutputSection *sec);
// This captures the local AddressState and makes it accessible
// deliberately. This is needed as there are some cases where we cannot just
@@ -334,7 +334,7 @@ class LinkerScript final {
bool needsInterpSection();
bool shouldKeep(InputSectionBase *s);
- const Defined *assignAddresses();
+ std::pair<const OutputSection *, const Defined *> assignAddresses();
bool spillSections();
void erasePotentialSpillSections();
void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index c498153f3348b..c2ccc4f49ad2e 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1479,16 +1479,22 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
changed |= part.memtagGlobalDescriptors->updateAllocSize();
}
- const Defined *changedSym = script->assignAddresses();
+ std::pair<const OutputSection *, const Defined *> changes =
+ script->assignAddresses();
if (!changed) {
// Some symbols may be dependent on section addresses. When we break the
// loop, the symbol values are finalized because a previous
// assignAddresses() finalized section addresses.
- if (!changedSym)
+ if (!changes.first && !changes.second)
break;
if (++assignPasses == 5) {
- errorOrWarn("assignment to symbol " + toString(*changedSym) +
- " does not converge");
+ if (changes.first)
+ errorOrWarn("address (0x" + Twine::utohexstr(changes.first->addr) +
+ ") of section '" + changes.first->name +
+ "' does not converge");
+ if (changes.second)
+ errorOrWarn("assignment to symbol " + toString(*changes.second) +
+ " does not converge");
break;
}
} else if (spilled) {
diff --git a/lld/test/ELF/linkerscript/memory-err.s b/lld/test/ELF/linkerscript/memory-err.s
index 98e71e79f17d8..5ec190a415b29 100644
--- a/lld/test/ELF/linkerscript/memory-err.s
+++ b/lld/test/ELF/linkerscript/memory-err.s
@@ -68,8 +68,8 @@
# RUN: symbol = .; \
# RUN: .data : { *(.data) } > ram \
# RUN: }' > %t.script
-# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR_OVERFLOW %s
-# ERR_OVERFLOW: error: section '.data' will not fit in region 'ram': overflowed by 2 bytes
+# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=NOT_CONVERGE %s
+# NOT_CONVERGE: error: address (0x14) of section '.text' does not converge
nop
diff --git a/lld/test/ELF/linkerscript/section-not-converge.test b/lld/test/ELF/linkerscript/section-not-converge.test
new file mode 100644
index 0000000000000..99e9eeb4f2d7a
--- /dev/null
+++ b/lld/test/ELF/linkerscript/section-not-converge.test
@@ -0,0 +1,37 @@
+# REQUIRES: x86
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
+
+# RUN: not ld.lld a.o -T a.lds 2>&1 | FileCheck %s --implicit-check-not=error:
+# CHECK: error: address (0x6014) of section '.text' does not converge
+
+# RUN: ld.lld a.o -T b.lds --noinhibit-exec 2>&1 | FileCheck %s --check-prefix=CHECK2 --implicit-check-not=warning:
+# CHECK2: warning: address (0x5014) of section '.text' does not converge
+# CHECK2: warning: assignment to symbol a does not converge
+
+#--- a.s
+.globl _start
+_start: .space 4
+.data; .byte 0
+
+#--- a.lds
+SECTIONS {
+ . = 0x1000;
+ .text ADDR(.data) + 0x1000 : { *(.text) }
+ .data : { *(.data) }
+}
+
+#--- b.lds
+SECTIONS {
+ . = 0x1000;
+ .text text : { *(.text) }
+ .data : {
+ *(.data)
+ x = ADDR(.text);
+ a = b;
+ b = c;
+ ## Absolute symbol; not converging
+ c = ABSOLUTE(ADDR(.text));
+ }
+ text = ADDR(.data) + 0x1000;
+}
More information about the llvm-commits
mailing list