[lld] [LLD][COFF] add __{data,bss}_{start,end}__ symbols for Cygwin support (PR #136180)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 14:53:27 PDT 2025
https://github.com/jeremyd2019 updated https://github.com/llvm/llvm-project/pull/136180
>From 05747de8c8ee350edf90f8de3560ccd82de61649 Mon Sep 17 00:00:00 2001
From: Jeremy Drake <github at jdrake.com>
Date: Sun, 13 Apr 2025 22:41:05 -0700
Subject: [PATCH 1/2] [LLD][COFF] add __{data,bss}_{start,end}__ symbols for
Cygwin support
Cygwin requires these symbols for its fork emulation to know what data
to copy into the child. GNU ld defines these symbols for MinGW targets
also, so do the same here.
Cygwin also has the .data_cygwin_nocopy section, which is merged into
.data outside the __data_start__ to __data_end__ range. This excludes
it from fork's copying. AFAIK it's only used by the Cygwin DLL itself
(which requires a custom linker script to link, that's not supported by
LLD), but the section is included in GNU ld's default linker script so
handle it here too.
Signed-off-by: Jeremy Drake <github at jdrake.com>
---
lld/COFF/Driver.cpp | 5 ++
lld/COFF/Writer.cpp | 33 ++++++++++-
lld/test/COFF/cygwin-symbols.s | 100 +++++++++++++++++++++++++++++++++
3 files changed, 136 insertions(+), 2 deletions(-)
create mode 100644 lld/test/COFF/cygwin-symbols.s
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 7aa13bdce488e..72e25634c19d8 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2039,6 +2039,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
parseMerge(".ctors=.rdata");
parseMerge(".dtors=.rdata");
parseMerge(".CRT=.rdata");
+ parseMerge(".data_cygwin_nocopy=.data");
}
// Handle /section
@@ -2495,6 +2496,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (config->mingw) {
symtab.addAbsolute(symtab.mangle("__CTOR_LIST__"), 0);
symtab.addAbsolute(symtab.mangle("__DTOR_LIST__"), 0);
+ symtab.addAbsolute("__data_start__", 0);
+ symtab.addAbsolute("__data_end__", 0);
+ symtab.addAbsolute("__bss_start__", 0);
+ symtab.addAbsolute("__bss_end__", 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 6ed1f884a9636..32a480e3126e2 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -239,6 +239,7 @@ class Writer {
void createRuntimePseudoRelocs();
void createECChunks();
void insertCtorDtorSymbols();
+ void insertBssDataStartEndSymbols();
void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
void createGuardCFTables();
void markSymbolsForRVATable(ObjFile *file,
@@ -314,6 +315,7 @@ class Writer {
OutputSection *textSec;
OutputSection *hexpthkSec;
+ OutputSection *bssSec;
OutputSection *rdataSec;
OutputSection *buildidSec;
OutputSection *dataSec;
@@ -1077,7 +1079,7 @@ void Writer::createSections() {
textSec = createSection(".text", code | r | x);
if (isArm64EC(ctx.config.machine))
hexpthkSec = createSection(".hexpthk", code | r | x);
- createSection(".bss", bss | r | w);
+ bssSec = createSection(".bss", bss | r | w);
rdataSec = createSection(".rdata", data | r);
buildidSec = createSection(".buildid", data | r);
dataSec = createSection(".data", data | r | w);
@@ -1260,8 +1262,10 @@ void Writer::createMiscChunks() {
if (config->autoImport)
createRuntimePseudoRelocs();
- if (config->mingw)
+ if (config->mingw) {
insertCtorDtorSymbols();
+ insertBssDataStartEndSymbols();
+ }
}
// Create .idata section for the DLL-imported symbol table.
@@ -2369,6 +2373,31 @@ void Writer::insertCtorDtorSymbols() {
}
}
+// MinGW (really, Cygwin) specific.
+// The Cygwin startup code uses __data_start__ __data_end__ __bss_start__
+// and __bss_end__ to know what to copy during fork emulation.
+void Writer::insertBssDataStartEndSymbols() {
+ if (!dataSec->chunks.empty()) {
+ Symbol *dataStartSym = ctx.symtab.find("__data_start__");
+ Symbol *dataEndSym = ctx.symtab.find("__data_end__");
+ Chunk *endChunk = dataSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(dataStartSym, dataStartSym->getName(),
+ dataSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(dataEndSym, dataEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+
+ if (!bssSec->chunks.empty()) {
+ Symbol *bssStartSym = ctx.symtab.find("__bss_start__");
+ Symbol *bssEndSym = ctx.symtab.find("__bss_end__");
+ Chunk *endChunk = bssSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(bssStartSym, bssStartSym->getName(),
+ bssSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(bssEndSym, bssEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+}
+
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
diff --git a/lld/test/COFF/cygwin-symbols.s b/lld/test/COFF/cygwin-symbols.s
new file mode 100644
index 0000000000000..586790160220e
--- /dev/null
+++ b/lld/test/COFF/cygwin-symbols.s
@@ -0,0 +1,100 @@
+# REQUIRES: x86
+# RUN: split-file %s %t.dir && cd %t.dir
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o data-no-bss.obj data-no-bss.s
+# RUN: lld-link -lldmingw -entry:main data-no-bss.obj -out:data-no-bss.exe
+# RUN: llvm-objdump -s data-no-bss.exe | FileCheck --check-prefix=DATANOBSS %s
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o bss-no-data.obj bss-no-data.s
+# RUN: lld-link -lldmingw -entry:main bss-no-data.obj -out:bss-no-data.exe
+# RUN: llvm-objdump -s bss-no-data.exe | FileCheck --check-prefix=BSSNODATA %s
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o data-and-bss.obj data-and-bss.s
+# RUN: lld-link -lldmingw -entry:main data-and-bss.obj -out:data-and-bss.exe
+# RUN: llvm-objdump -s data-and-bss.exe | FileCheck --check-prefix=DATAANDBSS %s
+
+#--- data-no-bss.s
+.globl main
+main:
+ nop
+
+.data
+ .quad 1
+ .byte 2
+
+.section .data_cygwin_nocopy, "w"
+ .align 4
+ .quad 3
+ .byte 4
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+#--- bss-no-data.s
+.globl main
+main:
+ nop
+
+.bss
+ .quad 0
+ .byte 0
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+#--- data-and-bss.s
+.globl main
+main:
+ nop
+
+.data
+ .quad 1
+ .byte 2
+
+.section .data_cygwin_nocopy, "w"
+ .align 4
+ .quad 3
+ .byte 4
+
+.bss
+ .quad 0
+ .byte 0
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+# DATANOBSS: Contents of section .data:
+# DATANOBSS-NEXT: 140003000 01000000 00000000 02000000 03000000
+# DATANOBSS-NEXT: 140003010 00000000 04
+# __data_start__ pointing at 0x140003000 and
+# __data_end__ pointing at 0x140003009.
+# DATANOBSS-NEXT: Contents of section .test:
+# DATANOBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
+# DATANOBSS-NEXT: 140004010 0c300040 01000000 0c300040 01000000
+
+# __bss_start__ pointing at 0x140003000 and
+# __bss_end__ pointing at 0x140003009.
+# BSSNODATA: Contents of section .test:
+# BSSNODATA-NEXT: 140004000 00300040 01000000 00300040 01000000
+# BSSNODATA-NEXT: 140004010 00300040 01000000 09300040 01000000
+
+# DATAANDBSS: Contents of section .data:
+# DATAANDBSS-NEXT: 140003000 01000000 00000000 02000000 00000000
+# DATAANDBSS-NEXT: 140003010 00000000 00000000 03000000 00000000
+# DATAANDBSS-NEXT: 140003020 04
+# __data_start__ pointing at 0x140003000 and
+# __data_end__ pointing at 0x140003009.
+# __bss_start__ pointing at 0x14000300c and
+# __bss_end__ pointing at 0x140003015.
+# DATAANDBSS-NEXT: Contents of section .test:
+# DATAANDBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
+# DATAANDBSS-NEXT: 140004010 0c300040 01000000 15300040 01000000
>From bae6a29ca7ce976552da2b5c3d9cdfaa7df29b97 Mon Sep 17 00:00:00 2001
From: Jeremy Drake <github at jdrake.com>
Date: Wed, 23 Apr 2025 14:30:05 -0700
Subject: [PATCH 2/2] [LLD][COFF] Ensure .bss is merged at the end of a
section.
Because it is full of zeros, it is expected that as much of it as
possible is elided from the actual image, and that cannot happen if
there is initialized data in the section after it.
Test this by having large .bss sections in the cygwin-symbols test.
---
lld/COFF/Writer.cpp | 52 ++++++++++++++++++++--------------
lld/test/COFF/cygwin-symbols.s | 29 +++++++++----------
2 files changed, 45 insertions(+), 36 deletions(-)
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 32a480e3126e2..5d2b66f064697 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -215,6 +215,7 @@ class Writer {
void appendImportThunks();
void locateImportTables();
void createExportTable();
+ void mergeSection(const std::map<StringRef, StringRef>::value_type &p);
void mergeSections();
void sortECChunks();
void appendECImportTables();
@@ -1571,6 +1572,30 @@ void Writer::createSymbolAndStringTable() {
fileSize = alignTo(fileOff, ctx.config.fileAlign);
}
+void Writer::mergeSection(const std::map<StringRef, StringRef>::value_type &p) {
+ StringRef toName = p.second;
+ if (p.first == toName)
+ return;
+ StringSet<> names;
+ while (true) {
+ if (!names.insert(toName).second)
+ Fatal(ctx) << "/merge: cycle found for section '" << p.first << "'";
+ auto i = ctx.config.merge.find(toName);
+ if (i == ctx.config.merge.end())
+ break;
+ toName = i->second;
+ }
+ OutputSection *from = findSection(p.first);
+ OutputSection *to = findSection(toName);
+ if (!from)
+ return;
+ if (!to) {
+ from->name = toName;
+ return;
+ }
+ to->merge(from);
+}
+
void Writer::mergeSections() {
llvm::TimeTraceScope timeScope("Merge sections");
if (!pdataSec->chunks.empty()) {
@@ -1599,28 +1624,13 @@ void Writer::mergeSections() {
}
for (auto &p : ctx.config.merge) {
- StringRef toName = p.second;
- if (p.first == toName)
- continue;
- StringSet<> names;
- while (true) {
- if (!names.insert(toName).second)
- Fatal(ctx) << "/merge: cycle found for section '" << p.first << "'";
- auto i = ctx.config.merge.find(toName);
- if (i == ctx.config.merge.end())
- break;
- toName = i->second;
- }
- OutputSection *from = findSection(p.first);
- OutputSection *to = findSection(toName);
- if (!from)
- continue;
- if (!to) {
- from->name = toName;
- continue;
- }
- to->merge(from);
+ if (p.first != ".bss")
+ mergeSection(p);
}
+
+ auto it = ctx.config.merge.find(".bss");
+ if (it != ctx.config.merge.end())
+ mergeSection(*it);
}
// EC targets may have chunks of various architectures mixed together at this
diff --git a/lld/test/COFF/cygwin-symbols.s b/lld/test/COFF/cygwin-symbols.s
index 586790160220e..d1aec4278425c 100644
--- a/lld/test/COFF/cygwin-symbols.s
+++ b/lld/test/COFF/cygwin-symbols.s
@@ -39,8 +39,7 @@ main:
nop
.bss
- .quad 0
- .byte 0
+ .zero 8192
.section .test, "w"
.quad __data_start__
@@ -63,8 +62,7 @@ main:
.byte 4
.bss
- .quad 0
- .byte 0
+ .zero 8192
.section .test, "w"
.quad __data_start__
@@ -79,22 +77,23 @@ main:
# __data_end__ pointing at 0x140003009.
# DATANOBSS-NEXT: Contents of section .test:
# DATANOBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
-# DATANOBSS-NEXT: 140004010 0c300040 01000000 0c300040 01000000
+# DATANOBSS-NEXT: 140004010 18300040 01000000 18300040 01000000
# __bss_start__ pointing at 0x140003000 and
-# __bss_end__ pointing at 0x140003009.
+# __bss_end__ pointing at 0x140005000.
+# BSSNODATA-NOT: Contents of section .data:
# BSSNODATA: Contents of section .test:
-# BSSNODATA-NEXT: 140004000 00300040 01000000 00300040 01000000
-# BSSNODATA-NEXT: 140004010 00300040 01000000 09300040 01000000
+# BSSNODATA-NEXT: 140005000 00300040 01000000 00300040 01000000
+# BSSNODATA-NEXT: 140005010 00300040 01000000 00500040 01000000
# DATAANDBSS: Contents of section .data:
-# DATAANDBSS-NEXT: 140003000 01000000 00000000 02000000 00000000
-# DATAANDBSS-NEXT: 140003010 00000000 00000000 03000000 00000000
-# DATAANDBSS-NEXT: 140003020 04
+# DATAANDBSS-NEXT: 140003000 01000000 00000000 02000000 03000000
+# DATAANDBSS-NEXT: 140003010 00000000 04000000 00000000 00000000
# __data_start__ pointing at 0x140003000 and
# __data_end__ pointing at 0x140003009.
-# __bss_start__ pointing at 0x14000300c and
-# __bss_end__ pointing at 0x140003015.
+# __bss_start__ pointing at 0x140003018 and
+# __bss_end__ pointing at 0x140005018.
+# DATAANDBSS: 1400031f0 00000000 00000000 00000000 00000000
# DATAANDBSS-NEXT: Contents of section .test:
-# DATAANDBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
-# DATAANDBSS-NEXT: 140004010 0c300040 01000000 15300040 01000000
+# DATAANDBSS-NEXT: 140006000 00300040 01000000 09300040 01000000
+# DATAANDBSS-NEXT: 140006010 18300040 01000000 18500040 01000000
More information about the llvm-commits
mailing list